ray/rllib/tests/data/cartpole/small.json

3 lines
29 KiB
JSON

{"type": "SampleBatch", "weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "eps_id": [241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action_prob": [0.4979577958583832, 0.5745141506195068, 0.5042742490768433, 0.5248998403549194, 0.5048907995223999, 0.5254997611045837, 0.4930223524570465, 0.5723332166671753, 0.5071576237678528, 0.5262983441352844, 0.5075111389160156, 0.4721700847148895, 0.4541035294532776, 0.5691784024238586, 0.45002007484436035, 0.42802754044532776, 0.5951988697052002, 0.5743389129638672, 0.44297751784324646, 0.5751434564590454, 0.4427056908607483, 0.575354278087616, 0.5583169460296631, 0.5349109768867493, 0.49323225021362305, 0.42819857597351074, 0.6240300536155701, 0.42723774909973145, 0.6247843503952026, 0.4268564283847809, 0.6255699396133423, 0.5718400478363037, 0.49357253313064575, 0.5718478560447693, 0.506999135017395, 0.4627947509288788, 0.44369709491729736, 0.42281273007392883, 0.40176495909690857, 0.6177492141723633, 0.6000679731369019, 0.4211883246898651, 0.5995147228240967, 0.578464925289154, 0.5586039423942566, 0.5260810256004333, 0.4879906177520752, 0.42811155319213867, 0.6308852434158325, 0.5760338306427002, 0.5073276162147522, 0.46694710850715637, 0.43938523530960083, 0.5832104086875916, 0.5628215670585632, 0.5309032201766968], "actions": [0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], "q_values": [[-0.005643954500555992, 0.0025248583406209946], [-0.04723002016544342, 0.2530632019042969], [-0.004162287805229425, 0.012935103848576546], [0.05779631435871124, -0.041885510087013245], [-0.0001599406823515892, 0.019403917714953423], [0.05187809467315674, -0.05020952224731445], [-6.351247429847717e-05, 0.027848877012729645], [-0.03533334285020828, 0.2560437023639679], [0.005023432895541191, 0.03365574777126312], [0.04525064304471016, -0.06003996357321739], [0.002838471904397011, 0.032885171473026276], [0.03723599761724472, -0.07419878989458084], [0.09575563669204712, -0.0883483961224556], [0.16416001319885254, -0.11433979868888855], [0.09313704073429108, -0.10745253413915634], [0.16196757555007935, -0.12793570756912231], [0.23910409212112427, -0.1463954746723175], [0.15805242955684662, -0.14152376353740692], [0.09662380814552307, -0.1324627697467804], [0.1541520208120346, -0.14871598780155182], [0.0929112657904625, -0.1372770369052887], [0.1511463224887848, -0.15258446335792542], [0.0875367745757103, -0.14679750800132751], [0.08854943513870239, -0.05132210999727249], [0.018426118418574333, 0.045498818159103394], [-0.04996141046285629, 0.23924344778060913], [-0.09354546666145325, 0.4131438434123993], [-0.038044273853302, 0.255085825920105], [-0.09211604297161102, 0.4177895784378052], [-0.030748017132282257, 0.26394063234329224], [-0.09104493260383606, 0.4222134053707123], [-0.02319370210170746, 0.2661687135696411], [0.02133956551551819, 0.04705086350440979], [-0.021654099225997925, 0.2677402198314667], [0.01794305630028248, 0.04594135284423828], [0.05681019276380539, -0.0922863557934761], [0.11023147404193878, -0.1159394159913063], [0.16652457416057587, -0.14471273124217987], [0.23569053411483765, -0.16242587566375732], [0.31461724638938904, -0.165388286113739], [0.22523169219493866, -0.1805165857076645], [0.14499591290950775, -0.17290116846561432], [0.2126035839319229, -0.19084002077579498], [0.12525871396064758, -0.19121608138084412], [0.07890036702156067, -0.15659788250923157], [0.07070913910865784, -0.03370969370007515], [-0.0010413788259029388, 0.047005534172058105], [-0.05502410978078842, 0.2345360815525055], [-0.15737640857696533, 0.37863999605178833], [-0.09506852179765701, 0.21144413948059082], [-0.06340484321117401, -0.0340922586619854], [0.016717009246349335, -0.11568755656480789], [0.059842679649591446, -0.1838146150112152], [0.12809047102928162, -0.20787617564201355], [0.055311597883701324, -0.19730976223945618], [-0.022230863571166992, -0.14600159227848053]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "prev_actions": [0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], "obs": [[0.040251147001981735, -0.009447001852095127, 0.04735473543405533, -0.00123753328807652], [0.040062207728624344, -0.2052149772644043, 0.04732998460531235, 0.30600231885910034], [0.03595791012048721, -0.010798314586281776, 0.05345002934336662, 0.028613731265068054], [0.03574194014072418, 0.18351800739765167, 0.054022304713726044, -0.24673765897750854], [0.039412301033735275, -0.012332209385931492, 0.04908755049109459, 0.06248391792178154], [0.03916565701365471, 0.1820528209209442, 0.050337228924036026, -0.2143164724111557], [0.0428067147731781, -0.013751287944614887, 0.04605090245604515, 0.09381057322025299], [0.04253168776631355, -0.20950199663639069, 0.04792711138725281, 0.4006595313549042], [0.03834164887666702, -0.015091483481228352, 0.055940303951501846, 0.12346379458904266], [0.03803981840610504, 0.17918626964092255, 0.05840957909822464, -0.1510591059923172], [0.041623543947935104, -0.01672130823135376, 0.055388398468494415, 0.1594637781381607], [0.04128911718726158, 0.17756572365760803, 0.05857767164707184, -0.11524398624897003], [0.044840432703495026, 0.37180155515670776, 0.05627279356122017, -0.3888860046863556], [0.05227646231651306, 0.5660815238952637, 0.04849507287144661, -0.6633091568946838], [0.06359809637069702, 0.3703196048736572, 0.03522888943552971, -0.3557596206665039], [0.0710044875741005, 0.5649234652519226, 0.028113696724176407, -0.6371290683746338], [0.08230295777320862, 0.7596423029899597, 0.015371114946901798, -0.9208275675773621], [0.09749580174684525, 0.5643160343170166, -0.003045437391847372, -0.623353898525238], [0.10878212004899979, 0.36923670768737793, -0.015512514859437943, -0.3316316306591034], [0.116166852414608, 0.5645759701728821, -0.022145148366689682, -0.6291658282279968], [0.1274583786725998, 0.36976999044418335, -0.03472846373915672, -0.3435385525226593], [0.1348537802696228, 0.5653683543205261, -0.04159923642873764, -0.6469672918319702], [0.14616113901138306, 0.3708499073982239, -0.054538581520318985, -0.3676687479019165], [0.15357813239097595, 0.17654363811016083, -0.06189195439219475, -0.09266908466815948], [0.15710900723934174, -0.01763911545276642, -0.06374533474445343, 0.17986272275447845], [0.1567562371492386, -0.2117937058210373, -0.06014808267354965, 0.4517746567726135], [0.15252035856246948, -0.4060157239437103, -0.0511125884950161, 0.7249079942703247], [0.14440004527568817, -0.21022562682628632, -0.03661442920565605, 0.4165858030319214], [0.14019553363323212, -0.4048100411891937, -0.028282713145017624, 0.6975045800209045], [0.13209933042526245, -0.20930756628513336, -0.014332621358335018, 0.39605414867401123], [0.12791317701339722, -0.4042232632637024, -0.006411538925021887, 0.6841840147972107], [0.1198287084698677, -0.20901288092136383, 0.007272141519933939, 0.38948947191238403], [0.11564845591783524, -0.013994891196489334, 0.015061930753290653, 0.09910821169614792], [0.11536855250597, -0.20932942628860474, 0.01704409532248974, 0.39650481939315796], [0.11118196696043015, -0.014453399926424026, 0.024974191561341286, 0.1092439591884613], [0.1108928993344307, 0.18030193448066711, 0.0271590705960989, -0.17545630037784576], [0.11449893563985825, 0.3750248849391937, 0.023649943992495537, -0.45944923162460327], [0.12199943512678146, 0.5698046684265137, 0.014460960403084755, -0.7445847988128662], [0.13339552283287048, 0.7647241353988647, -0.000430735235568136, -1.032681941986084], [0.14869001507759094, 0.9598518013954163, -0.02108437567949295, -1.3255001306533813], [0.16788704693317413, 0.7650023102760315, -0.047594375908374786, -1.0394892692565918], [0.1831870973110199, 0.5705440044403076, -0.06838416308164597, -0.762119472026825], [0.1945979744195938, 0.7665379047393799, -0.08362655341625214, -1.0755125284194946], [0.2099287360906601, 0.5726144313812256, -0.10513680428266525, -0.8102014064788818], [0.2213810235261917, 0.3790779709815979, -0.12134082615375519, -0.552353024482727], [0.22896258533000946, 0.1858503371477127, -0.1323878914117813, -0.30022940039634705], [0.2326795905828476, -0.007160619366914034, -0.13839247822761536, -0.05205482989549637], [0.23253637552261353, -0.2000548243522644, -0.1394335776567459, 0.1939624696969986], [0.22853527963161469, -0.3929353952407837, -0.13555432856082916, 0.4396146833896637], [0.22067657113075256, -0.1961815357208252, -0.1267620325088501, 0.10746019333600998], [0.21675294637680054, 0.0005075104418210685, -0.12461283057928085, -0.22237446904182434], [0.21676309406757355, 0.19716985523700714, -0.1290603131055832, -0.5516219735145569], [0.2207064926624298, 0.39384564757347107, -0.14009276032447815, -0.8820206522941589], [0.22858339548110962, 0.5905638933181763, -0.15773317217826843, -1.2152597904205322], [0.2403946816921234, 0.39778846502304077, -0.18203836679458618, -0.9758678674697876], [0.24835044145584106, 0.20551282167434692, -0.20155572891235352, -0.745444118976593]], "new_obs": [[0.040062207728624344, -0.2052149772644043, 0.04732998460531235, 0.30600231885910034], [0.03595791012048721, -0.010798314586281776, 0.05345002934336662, 0.028613731265068054], [0.03574194014072418, 0.18351800739765167, 0.054022304713726044, -0.24673765897750854], [0.039412301033735275, -0.012332209385931492, 0.04908755049109459, 0.06248391792178154], [0.03916565701365471, 0.1820528209209442, 0.050337228924036026, -0.2143164724111557], [0.0428067147731781, -0.013751287944614887, 0.04605090245604515, 0.09381057322025299], [0.04253168776631355, -0.20950199663639069, 0.04792711138725281, 0.4006595313549042], [0.03834164887666702, -0.015091483481228352, 0.055940303951501846, 0.12346379458904266], [0.03803981840610504, 0.17918626964092255, 0.05840957909822464, -0.1510591059923172], [0.041623543947935104, -0.01672130823135376, 0.055388398468494415, 0.1594637781381607], [0.04128911718726158, 0.17756572365760803, 0.05857767164707184, -0.11524398624897003], [0.044840432703495026, 0.37180155515670776, 0.05627279356122017, -0.3888860046863556], [0.05227646231651306, 0.5660815238952637, 0.04849507287144661, -0.6633091568946838], [0.06359809637069702, 0.3703196048736572, 0.03522888943552971, -0.3557596206665039], [0.0710044875741005, 0.5649234652519226, 0.028113696724176407, -0.6371290683746338], [0.08230295777320862, 0.7596423029899597, 0.015371114946901798, -0.9208275675773621], [0.09749580174684525, 0.5643160343170166, -0.003045437391847372, -0.623353898525238], [0.10878212004899979, 0.36923670768737793, -0.015512514859437943, -0.3316316306591034], [0.116166852414608, 0.5645759701728821, -0.022145148366689682, -0.6291658282279968], [0.1274583786725998, 0.36976999044418335, -0.03472846373915672, -0.3435385525226593], [0.1348537802696228, 0.5653683543205261, -0.04159923642873764, -0.6469672918319702], [0.14616113901138306, 0.3708499073982239, -0.054538581520318985, -0.3676687479019165], [0.15357813239097595, 0.17654363811016083, -0.06189195439219475, -0.09266908466815948], [0.15710900723934174, -0.01763911545276642, -0.06374533474445343, 0.17986272275447845], [0.1567562371492386, -0.2117937058210373, -0.06014808267354965, 0.4517746567726135], [0.15252035856246948, -0.4060157239437103, -0.0511125884950161, 0.7249079942703247], [0.14440004527568817, -0.21022562682628632, -0.03661442920565605, 0.4165858030319214], [0.14019553363323212, -0.4048100411891937, -0.028282713145017624, 0.6975045800209045], [0.13209933042526245, -0.20930756628513336, -0.014332621358335018, 0.39605414867401123], [0.12791317701339722, -0.4042232632637024, -0.006411538925021887, 0.6841840147972107], [0.1198287084698677, -0.20901288092136383, 0.007272141519933939, 0.38948947191238403], [0.11564845591783524, -0.013994891196489334, 0.015061930753290653, 0.09910821169614792], [0.11536855250597, -0.20932942628860474, 0.01704409532248974, 0.39650481939315796], [0.11118196696043015, -0.014453399926424026, 0.024974191561341286, 0.1092439591884613], [0.1108928993344307, 0.18030193448066711, 0.0271590705960989, -0.17545630037784576], [0.11449893563985825, 0.3750248849391937, 0.023649943992495537, -0.45944923162460327], [0.12199943512678146, 0.5698046684265137, 0.014460960403084755, -0.7445847988128662], [0.13339552283287048, 0.7647241353988647, -0.000430735235568136, -1.032681941986084], [0.14869001507759094, 0.9598518013954163, -0.02108437567949295, -1.3255001306533813], [0.16788704693317413, 0.7650023102760315, -0.047594375908374786, -1.0394892692565918], [0.1831870973110199, 0.5705440044403076, -0.06838416308164597, -0.762119472026825], [0.1945979744195938, 0.7665379047393799, -0.08362655341625214, -1.0755125284194946], [0.2099287360906601, 0.5726144313812256, -0.10513680428266525, -0.8102014064788818], [0.2213810235261917, 0.3790779709815979, -0.12134082615375519, -0.552353024482727], [0.22896258533000946, 0.1858503371477127, -0.1323878914117813, -0.30022940039634705], [0.2326795905828476, -0.007160619366914034, -0.13839247822761536, -0.05205482989549637], [0.23253637552261353, -0.2000548243522644, -0.1394335776567459, 0.1939624696969986], [0.22853527963161469, -0.3929353952407837, -0.13555432856082916, 0.4396146833896637], [0.22067657113075256, -0.1961815357208252, -0.1267620325088501, 0.10746019333600998], [0.21675294637680054, 0.0005075104418210685, -0.12461283057928085, -0.22237446904182434], [0.21676309406757355, 0.19716985523700714, -0.1290603131055832, -0.5516219735145569], [0.2207064926624298, 0.39384564757347107, -0.14009276032447815, -0.8820206522941589], [0.22858339548110962, 0.5905638933181763, -0.15773317217826843, -1.2152597904205322], [0.2403946816921234, 0.39778846502304077, -0.18203836679458618, -0.9758678674697876], [0.24835044145584106, 0.20551282167434692, -0.20155572891235352, -0.745444118976593], [0.2524607181549072, 0.01365789957344532, -0.21646460890769958, -0.5223444700241089]]}
{"type": "SampleBatch", "weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "eps_id": [1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action_prob": [0.5135254263877869, 0.4770704507827759, 0.5442214012145996, 0.47627949714660645, 0.5454674363136292, 0.5253314971923828, 0.48434364795684814, 0.5828204154968262, 0.48531463742256165, 0.5827109813690186, 0.5136748552322388, 0.4766709804534912, 0.45407694578170776, 0.4279625415802002, 0.5955550074577332, 0.5748928189277649, 0.5481062531471252, 0.4735119938850403, 0.5489782094955444, 0.47440415620803833, 0.5505622625350952, 0.5247683525085449, 0.5148704051971436, 0.4746163487434387, 0.4442490339279175, 0.4205590784549713], "actions": [1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1], "q_values": [[-0.015597449615597725, 0.038517292588949203], [0.04316295310854912, -0.04861947521567345], [0.09876783937215805, -0.0785810723900795], [0.03863132745027542, -0.05632191151380539], [0.09450361132621765, -0.08787006139755249], [0.033118072897195816, -0.06829479336738586], [-0.011613234877586365, 0.0510326623916626], [-0.08389873802661896, 0.25046348571777344], [-0.021378351375460625, 0.0373799204826355], [-0.08555285632610321, 0.24835921823978424], [-0.028901388868689537, 0.025811681523919106], [0.02785981446504593, -0.0655241534113884], [0.0917566642165184, -0.09245472401380539], [0.1692613959312439, -0.12090739607810974], [0.25693047046661377, -0.1300475001335144], [0.1545487344264984, -0.14729353785514832], [0.055337414145469666, -0.13768470287322998], [0.00671960785984993, -0.09933169186115265], [0.05141502618789673, -0.14512820541858673], [-0.008995093405246735, -0.1114681214094162], [0.0450827032327652, -0.15785999596118927], [-0.02486952394247055, -0.12402410060167313], [-0.15750475227832794, -0.09800545871257782], [-0.04371977970004082, -0.14534175395965576], [0.03489668667316437, -0.1890382468700409], [0.1171964704990387, -0.20328232645988464]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "prev_actions": [0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1], "obs": [[0.0450199730694294, -0.03486160933971405, 0.016064710915088654, 0.011697827838361263], [0.04432274028658867, 0.16002631187438965, 0.01629866659641266, -0.2758735120296478], [0.047523267567157745, 0.3549119830131531, 0.010781196877360344, -0.5633715987205505], [0.05462150648236275, 0.15964041650295258, -0.0004862352798227221, -0.2673116624355316], [0.05781431496143341, 0.3547693192958832, -0.0058324686251580715, -0.5601479411125183], [0.06490969657897949, 0.1597297042608261, -0.017035426571965218, -0.2693082094192505], [0.06810429692268372, -0.035145051777362823, -0.022421590983867645, 0.01795332506299019], [0.06740139424800873, -0.22993838787078857, -0.022062525153160095, 0.30347850918769836], [0.06280262768268585, -0.03450907766819, -0.01599295437335968, 0.00392001261934638], [0.06211244314908981, -0.22939805686473846, -0.0159145537763834, 0.2915143668651581], [0.057524483650922775, -0.03405284881591797, -0.010084266774356365, -0.006145021412521601], [0.05684342607855797, 0.1612122654914856, -0.010207167826592922, -0.3019925057888031], [0.06006767228245735, 0.35647818446159363, -0.016247017309069633, -0.597877025604248], [0.06719723343849182, 0.5518236756324768, -0.028204558417201042, -0.8956329822540283], [0.07823371142148972, 0.7473164796829224, -0.04611721634864807, -1.1970465183258057], [0.09318003803491592, 0.5528207421302795, -0.07005815207958221, -0.9191668629646301], [0.10423645377159119, 0.35871216654777527, -0.08844148367643356, -0.6492984294891357], [0.11141069233417511, 0.16492627561092377, -0.10142745822668076, -0.38572362065315247], [0.11470922082662582, 0.3613308370113373, -0.10914192348718643, -0.7085849642753601], [0.12193583697080612, 0.16787634789943695, -0.12331362813711166, -0.45215386152267456], [0.12529335916042328, 0.36450672149658203, -0.1323567032814026, -0.7810221314430237], [0.13258349895477295, 0.1714283674955368, -0.14797714352607727, -0.5327370762825012], [0.13601206243038177, -0.021336432546377182, -0.15863189101219177, -0.29009655117988586], [0.13558533787727356, 0.17564991116523743, -0.1644338220357895, -0.6283085346221924], [0.13909833133220673, 0.3726385235786438, -0.1769999861717224, -0.9679317474365234], [0.14655110239982605, 0.5696383714675903, -0.19635862112045288, -1.3105814456939697]], "new_obs": [[0.04432274028658867, 0.16002631187438965, 0.01629866659641266, -0.2758735120296478], [0.047523267567157745, 0.3549119830131531, 0.010781196877360344, -0.5633715987205505], [0.05462150648236275, 0.15964041650295258, -0.0004862352798227221, -0.2673116624355316], [0.05781431496143341, 0.3547693192958832, -0.0058324686251580715, -0.5601479411125183], [0.06490969657897949, 0.1597297042608261, -0.017035426571965218, -0.2693082094192505], [0.06810429692268372, -0.035145051777362823, -0.022421590983867645, 0.01795332506299019], [0.06740139424800873, -0.22993838787078857, -0.022062525153160095, 0.30347850918769836], [0.06280262768268585, -0.03450907766819, -0.01599295437335968, 0.00392001261934638], [0.06211244314908981, -0.22939805686473846, -0.0159145537763834, 0.2915143668651581], [0.057524483650922775, -0.03405284881591797, -0.010084266774356365, -0.006145021412521601], [0.05684342607855797, 0.1612122654914856, -0.010207167826592922, -0.3019925057888031], [0.06006767228245735, 0.35647818446159363, -0.016247017309069633, -0.597877025604248], [0.06719723343849182, 0.5518236756324768, -0.028204558417201042, -0.8956329822540283], [0.07823371142148972, 0.7473164796829224, -0.04611721634864807, -1.1970465183258057], [0.09318003803491592, 0.5528207421302795, -0.07005815207958221, -0.9191668629646301], [0.10423645377159119, 0.35871216654777527, -0.08844148367643356, -0.6492984294891357], [0.11141069233417511, 0.16492627561092377, -0.10142745822668076, -0.38572362065315247], [0.11470922082662582, 0.3613308370113373, -0.10914192348718643, -0.7085849642753601], [0.12193583697080612, 0.16787634789943695, -0.12331362813711166, -0.45215386152267456], [0.12529335916042328, 0.36450672149658203, -0.1323567032814026, -0.7810221314430237], [0.13258349895477295, 0.1714283674955368, -0.14797714352607727, -0.5327370762825012], [0.13601206243038177, -0.021336432546377182, -0.15863189101219177, -0.29009655117988586], [0.13558533787727356, 0.17564991116523743, -0.1644338220357895, -0.6283085346221924], [0.13909833133220673, 0.3726385235786438, -0.1769999861717224, -0.9679317474365234], [0.14655110239982605, 0.5696383714675903, -0.19635862112045288, -1.3105814456939697], [0.15794387459754944, 0.7666289806365967, -0.2225702553987503, -1.6577483415603638]]}
{"type": "SampleBatch", "weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "eps_id": [464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action_prob": [0.49811699986457825, 0.5603018999099731, 0.4948766827583313, 0.5607614517211914, 0.4922669231891632, 0.43934890627861023, 0.6127749681472778, 0.438413143157959, 0.38857191801071167, 0.6461699604988098, 0.6107516288757324, 0.43830615282058716, 0.608411967754364, 0.5631444454193115, 0.518650472164154, 0.5026047825813293, 0.48087823390960693, 0.5650154948234558, 0.4770132005214691, 0.5669832229614258], "actions": [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1], "q_values": [[0.034373246133327484, 0.041905246675014496], [-0.040324486792087555, 0.20206278562545776], [0.03108956664800644, 0.0515836626291275], [-0.03812238574028015, 0.20613068342208862], [0.016220448538661003, 0.047155141830444336], [-0.03483893722295761, 0.20896606147289276], [-0.10473792254924774, 0.3542538285255432], [-0.02594645321369171, 0.22165822982788086], [-0.10031923651695251, 0.35299989581108093], [-0.1714298129081726, 0.430816113948822], [-0.09505866467952728, 0.3554142117500305], [0.0006859749555587769, 0.2487252801656723], [-0.08787457644939423, 0.35276734828948975], [0.004122734069824219, 0.25805625319480896], [0.038704317063093185, 0.11334069073200226], [-0.01853189617395401, -0.028951097279787064], [0.025288723409175873, 0.10181311517953873], [-0.020684152841567993, 0.24085858464241028], [0.013561476022005081, 0.10557354986667633], [-0.03565507382154465, 0.23389792442321777]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "prev_actions": [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0], "obs": [[-0.03543581813573837, 0.03231120854616165, 0.04250812903046608, -0.04545578733086586], [-0.03478959575295448, -0.16339369118213654, 0.04159901291131973, 0.2603300213813782], [-0.038057468831539154, 0.0311104916036129, 0.04680561274290085, -0.018947282806038857], [-0.03743525967001915, -0.16465036571025848, 0.046426668763160706, 0.28812822699546814], [-0.04072826728224754, 0.02977983094751835, 0.052189234644174576, 0.010441737249493599], [-0.04013266786932945, -0.1660502403974533, 0.052398066967725754, 0.3191235661506653], [-0.043453674763441086, -0.36187776923179626, 0.05878053978085518, 0.6278597116470337], [-0.05069122835993767, -0.1676233410835266, 0.0713377296924591, 0.35425281524658203], [-0.05404369533061981, -0.36368328332901, 0.07842279225587845, 0.6685502529144287], [-0.061317361891269684, -0.5598031282424927, 0.09179379791021347, 0.9848584532737732], [-0.07251342386007309, -0.36602261662483215, 0.11149096488952637, 0.7223610281944275], [-0.07983388006687164, -0.17260494828224182, 0.12593817710876465, 0.4667462706565857], [-0.08328597247600555, -0.3692602813243866, 0.13527311384677887, 0.7963211536407471], [-0.09067118167877197, -0.17622822523117065, 0.15119953453540802, 0.5490673184394836], [-0.09419574588537216, 0.01648259162902832, 0.16218088567256927, 0.30758246779441833], [-0.09386609494686127, 0.20896689593791962, 0.1683325320482254, 0.0701172798871994], [-0.08968675881624222, 0.011881737969815731, 0.1697348803281784, 0.4108228385448456], [-0.08944912254810333, -0.18518869578838348, 0.17795133590698242, 0.751843273639679], [-0.09315289556980133, 0.00709147984161973, 0.19298820197582245, 0.5200196504592896], [-0.09301106631755829, -0.1901485174894333, 0.20338858664035797, 0.8667741417884827]], "new_obs": [[-0.03478959575295448, -0.16339369118213654, 0.04159901291131973, 0.2603300213813782], [-0.038057468831539154, 0.0311104916036129, 0.04680561274290085, -0.018947282806038857], [-0.03743525967001915, -0.16465036571025848, 0.046426668763160706, 0.28812822699546814], [-0.04072826728224754, 0.02977983094751835, 0.052189234644174576, 0.010441737249493599], [-0.04013266786932945, -0.1660502403974533, 0.052398066967725754, 0.3191235661506653], [-0.043453674763441086, -0.36187776923179626, 0.05878053978085518, 0.6278597116470337], [-0.05069122835993767, -0.1676233410835266, 0.0713377296924591, 0.35425281524658203], [-0.05404369533061981, -0.36368328332901, 0.07842279225587845, 0.6685502529144287], [-0.061317361891269684, -0.5598031282424927, 0.09179379791021347, 0.9848584532737732], [-0.07251342386007309, -0.36602261662483215, 0.11149096488952637, 0.7223610281944275], [-0.07983388006687164, -0.17260494828224182, 0.12593817710876465, 0.4667462706565857], [-0.08328597247600555, -0.3692602813243866, 0.13527311384677887, 0.7963211536407471], [-0.09067118167877197, -0.17622822523117065, 0.15119953453540802, 0.5490673184394836], [-0.09419574588537216, 0.01648259162902832, 0.16218088567256927, 0.30758246779441833], [-0.09386609494686127, 0.20896689593791962, 0.1683325320482254, 0.0701172798871994], [-0.08968675881624222, 0.011881737969815731, 0.1697348803281784, 0.4108228385448456], [-0.08944912254810333, -0.18518869578838348, 0.17795133590698242, 0.751843273639679], [-0.09315289556980133, 0.00709147984161973, 0.19298820197582245, 0.5200196504592896], [-0.09301106631755829, -0.1901485174894333, 0.20338858664035797, 0.8667741417884827], [-0.09681403636932373, 0.0017116105882450938, 0.22072407603263855, 0.6443008184432983]]}