{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:22:48Z","timestamp":1740100968062,"version":"3.37.3"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["CCF-2006591"],"award-info":[{"award-number":["CCF-2006591"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9892004","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Meta Proximal Policy Optimization for Cooperative Multi-Agent Continuous Control"],"prefix":"10.1109","author":[{"given":"Boli","family":"Fang","sequence":"first","affiliation":[{"name":"Indiana University,Department of Computer Science,Bloomington,IN,USA"}]},{"given":"Zhenghao","family":"Peng","sequence":"additional","affiliation":[{"name":"Chinese University of Hong Kong,Department of Information Engineering,Hong Kong,China"}]},{"given":"Hao","family":"Sun","sequence":"additional","affiliation":[{"name":"University of Cambridge,Department of Applied Mathematics and Theoretical Physics,UK"}]},{"given":"Qin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Indiana University,Department of Computer Science,Bloomington,IN,USA"}]}],"member":"263","reference":[{"key":"ref38","article-title":"Rllib: Abstractions for distributed reinforcement learning","author":"liang","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref33","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"0","journal-title":"Proceedings of the International Conference on Learning Representations (ICLR)"},{"key":"ref32","volume":"abs 2009 10897","author":"hsu","year":"2020","journal-title":"Revisiting design choices in proximal policy optimization"},{"key":"ref31","article-title":"Meta-gradient reinforcement learning with an objective discovered online","author":"xu","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref30","article-title":"Meta-gradient reinforcement learning","author":"xu","year":"2018","journal-title":"NeurIPS"},{"key":"ref37","article-title":"The cross entropy method for fast policy search","author":"mannor","year":"2003","journal-title":"ser ICML'03"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref35","article-title":"Multi-agent reinforcement learning in sequential social dilemmas","author":"leibo","year":"0","journal-title":"Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems ser AAMAS '17"},{"key":"ref34","volume":"abs 2003 6709","author":"de witt","year":"2020","journal-title":"Deep multi-agent reinforcement learning for decentralized continuous cooperative control"},{"key":"ref10","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"0","journal-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems ser NIPS'17"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6220"},{"journal-title":"Proximal policy optimization algorithms","year":"0","author":"schulman","key":"ref13"},{"key":"ref14","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"0","journal-title":"Proceedings of the 31st International Conference on Machine Learning"},{"key":"ref15","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref16","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"0","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref17","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"Proceedings of the 33rd International Conference on Machine Learning"},{"key":"ref18","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"Proceedings of The 32nd International Conference on Machine Learning"},{"key":"ref19","volume":"abs 2009 4416","author":"cobbe","year":"2020","journal-title":"Phasic policy gradient"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-28929-8","author":"oliehoek","year":"2016","journal-title":"A Concise Introduction to Decentralized POMDPs"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3390\/s150510026"},{"key":"ref27","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"kulkarni","year":"2016","journal-title":"NIPS"},{"key":"ref3","article-title":"Deep reinforcement learning for swarm systems","author":"h\u00fcttenrauch","year":"2019","journal-title":"Journal of Machine Learning Research"},{"key":"ref6","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"sunehag","year":"2018","journal-title":"AAMAS"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref29"},{"journal-title":"Optimal and approximate q-value functions for decentralized pomdps","year":"2008","author":"oliehoek","key":"ref5"},{"key":"ref8","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","author":"son","year":"0","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"key":"ref7","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"0","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref2","article-title":"Deep reinforcement learning for autonomous driving: A survey","author":"kiran","year":"2021","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"ref9","article-title":"Weighted qmix: Expanding monotonic value function factorisation","author":"rashid","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860433"},{"key":"ref20","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref22","article-title":"Liir: Learning individual intrinsic reward in multi-agent reinforcement learning","author":"du","year":"2019","journal-title":"NeurIPS"},{"key":"ref21","article-title":"Learning implicit credit assignment for cooperative multi-agent reinforcement learning","author":"zhou","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref24","volume":"abs 2103 3662","author":"willemsen","year":"2021","journal-title":"MAMBPO sample-efficient multi-robot reinforcement learning using learned world models"},{"key":"ref23","article-title":"Actor-attention-critic for multi-agent reinforcement learning","author":"iqbal","year":"0","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"key":"ref26","article-title":"On learning intrinsic rewards for policy gradient methods","author":"zheng","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref25","article-title":"The surprising effectiveness of MAPPO in cooperative, multi-agent games","volume":"abs 2103 1955","author":"yu","year":"2021","journal-title":"CoRR"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2022,7,18]]},"location":"Padua, Italy","end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/http\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09892004.pdf?arnumber=9892004","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,14]],"date-time":"2022-10-14T20:51:59Z","timestamp":1665780719000},"score":1,"resource":{"primary":{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/ieeexplore.ieee.org\/document\/9892004\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":38,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.1109\/ijcnn55064.2022.9892004","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}