ray/rllib/tests/data/cartpole/small.json

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

4 lines
29 KiB
JSON
Raw Normal View History

{"type": "SampleBatch", "weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "eps_id": [241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760, 241561760], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action_prob": [0.4979577958583832, 0.5745141506195068, 0.5042742490768433, 0.5248998403549194, 0.5048907995223999, 0.5254997611045837, 0.4930223524570465, 0.5723332166671753, 0.5071576237678528, 0.5262983441352844, 0.5075111389160156, 0.4721700847148895, 0.4541035294532776, 0.5691784024238586, 0.45002007484436035, 0.42802754044532776, 0.5951988697052002, 0.5743389129638672, 0.44297751784324646, 0.5751434564590454, 0.4427056908607483, 0.575354278087616, 0.5583169460296631, 0.5349109768867493, 0.49323225021362305, 0.42819857597351074, 0.6240300536155701, 0.42723774909973145, 0.6247843503952026, 0.4268564283847809, 0.6255699396133423, 0.5718400478363037, 0.49357253313064575, 0.5718478560447693, 0.506999135017395, 0.4627947509288788, 0.44369709491729736, 0.42281273007392883, 0.40176495909690857, 0.6177492141723633, 0.6000679731369019, 0.4211883246898651, 0.5995147228240967, 0.578464925289154, 0.5586039423942566, 0.5260810256004333, 0.4879906177520752, 0.42811155319213867, 0.6308852434158325, 0.5760338306427002, 0.5073276162147522, 0.46694710850715637, 0.43938523530960083, 0.5832104086875916, 0.5628215670585632, 0.5309032201766968], "actions": [0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], "q_values": [[-0.005643954500555992, 0.0025248583406209946], [-0.04723002016544342, 0.2530632019042969], [-0.004162287805229425, 0.012935103848576546], [0.05779631435871124, -0.041885510087013245], [-0.0001599406823515892, 0.019403917714953423], [0.05187809467315674, -0.05020952224731445], [-6.351247429847717e-05, 0.027848877012729645], [-0.03533334285020828, 0.2560437023639679], [0.005023432895541191, 0.03365574777126312], [0.04525064304471016, -0.06003996357321739], [0.00283
{"type": "SampleBatch", "weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "eps_id": [1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020, 1238833020], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action_prob": [0.5135254263877869, 0.4770704507827759, 0.5442214012145996, 0.47627949714660645, 0.5454674363136292, 0.5253314971923828, 0.48434364795684814, 0.5828204154968262, 0.48531463742256165, 0.5827109813690186, 0.5136748552322388, 0.4766709804534912, 0.45407694578170776, 0.4279625415802002, 0.5955550074577332, 0.5748928189277649, 0.5481062531471252, 0.4735119938850403, 0.5489782094955444, 0.47440415620803833, 0.5505622625350952, 0.5247683525085449, 0.5148704051971436, 0.4746163487434387, 0.4442490339279175, 0.4205590784549713], "actions": [1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1], "q_values": [[-0.015597449615597725, 0.038517292588949203], [0.04316295310854912, -0.04861947521567345], [0.09876783937215805, -0.0785810723900795], [0.03863132745027542, -0.05632191151380539], [0.09450361132621765, -0.08787006139755249], [0.033118072897195816, -0.06829479336738586], [-0.011613234877586365, 0.0510326623916626], [-0.08389873802661896, 0.25046348571777344], [-0.021378351375460625, 0.0373799204826355], [-0.08555285632610321, 0.24835921823978424], [-0.028901388868689537, 0.025811681523919106], [0.02785981446504593, -0.0655241534113884], [0.0917566642165184, -0.09245472401380539], [0.1692613959312439, -0.12090739607810974], [0.25693047046661377, -0.1300475001335144], [0.1545487344264984, -0.14729353785514832], [0.055337414145469666, -0.13768470287322998], [0.00671960785984993, -0.09933169186115265], [0.05141502618789673, -0.14512820541858673], [-0.008995093405246735, -0.1114681214094162], [0.0450827032327652, -0.15785999596118927], [-0.02486952394247055, -0.12402410060167313], [-0.15750475227832794, -0.09800545871257782], [-0.04371977970004082, -0.14534175395965576], [0.03489668667316437, -0.1890382468700409], [0.1171964704990387, -0.20328232645988464]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "prev_actions": [0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1], "obs": [[0.0450199730694294, -0.03486160933971405, 0.016064710915088654, 0.011697827838361263], [0.04432274028658867, 0.16002631187438965, 0.01629866659641266, -0.2758735120296478], [0.047523267567157745, 0.3549119830131531, 0.010781196877360344, -0.5633715987205505], [0.05462150648236275, 0.15964041650295258, -0.0004862352798227221, -0.2673116624355316], [0.05781431496143341, 0.3547693192958832, -0.0058324686251580715, -0.5601479411125183], [0.06490969657897949, 0.1597297042608261, -0.017035426571965218, -0.2693082094192505], [0.06810429692268372, -0.035145051777362823, -0.022421590983867645, 0.01795332506299019], [0.06740139424800873, -0.22993838787078857, -0.022062525153160095, 0.30347850918769836], [0.06280262768268585, -0.03450907766819, -0.01599295437335968, 0.00392001261934638], [0.06211244314908981, -0.22939805686473846, -0.0159145537763834, 0.2915143668651581], [0.0575244836
{"type": "SampleBatch", "weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "eps_id": [464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363, 464626363], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action_prob": [0.49811699986457825, 0.5603018999099731, 0.4948766827583313, 0.5607614517211914, 0.4922669231891632, 0.43934890627861023, 0.6127749681472778, 0.438413143157959, 0.38857191801071167, 0.6461699604988098, 0.6107516288757324, 0.43830615282058716, 0.608411967754364, 0.5631444454193115, 0.518650472164154, 0.5026047825813293, 0.48087823390960693, 0.5650154948234558, 0.4770132005214691, 0.5669832229614258], "actions": [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1], "q_values": [[0.034373246133327484, 0.041905246675014496], [-0.040324486792087555, 0.20206278562545776], [0.03108956664800644, 0.0515836626291275], [-0.03812238574028015, 0.20613068342208862], [0.016220448538661003, 0.047155141830444336], [-0.03483893722295761, 0.20896606147289276], [-0.10473792254924774, 0.3542538285255432], [-0.02594645321369171, 0.22165822982788086], [-0.10031923651695251, 0.35299989581108093], [-0.1714298129081726, 0.430816113948822], [-0.09505866467952728, 0.3554142117500305], [0.0006859749555587769, 0.2487252801656723], [-0.08787457644939423, 0.35276734828948975], [0.004122734069824219, 0.25805625319480896], [0.038704317063093185, 0.11334069073200226], [-0.01853189617395401, -0.028951097279787064], [0.025288723409175873, 0.10181311517953873], [-0.020684152841567993, 0.24085858464241028], [0.013561476022005081, 0.10557354986667633], [-0.03565507382154465, 0.23389792442321777]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "prev_actions": [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0], "obs": [[-0.03543581813573837, 0.03231120854616165, 0.04250812903046608, -0.04545578733086586], [-0.03478959575295448, -0.16339369118213654, 0.04159901291131973, 0.2603300213813782], [-0.038057468831539154, 0.0311104916036129, 0.04680561274290085, -0.018947282806038857], [-0.03743525967001915, -0.16465036571025848, 0.046426668763160706, 0.28812822699546814], [-0.04072826728224754, 0.02977983094751835, 0.052189234644174576, 0.010441737249493599], [-0.04013266786932945, -0.1660502403974533, 0.052398066967725754, 0.3191235661506653], [-0.043453674763441086, -0.36187776923179626, 0.05878053978085518, 0.6278597116470337], [-0.05069122835993767, -0.1676233410835266, 0.0713377296924591, 0.35425281524658203], [-0.05404369533061981, -0.36368328332901, 0.07842279225587845, 0.6685502529144287], [-0.061317361891269684, -0.5598031282424927, 0.09179379791021347, 0.9848584532737732], [-0.07251342386007309, -0.36602261662483215, 0.11149096488952637, 0.7223610281944275], [-0.07983388006687164, -0.17260494828224182, 0.12593817710876465, 0.4667462706565857], [-0.08328597247600555, -0.3692602813243866, 0.13527311384677887, 0.7963211536407471], [-0.09067118167877197, -0.17622822523117065, 0.15119953453540802, 0.5490673184394836], [-0.09419574588537216, 0.01648259162902832, 0.16218088567256927, 0.30758246779441833], [-0.09386609494686127, 0.20896689593791962, 0.1683325320482254, 0.0701172798871994], [-0.08968675881624222, 0.011881737969815731, 0.1697348803281784, 0.4108228385448456], [-0.08944912254810333, -0.18518869578838348, 0.17795133590698242, 0.751843273639679], [-0.09315289556980133, 0.00709147984161973,