Skip to content

Commit

Permalink
added mujoco ant (Denys88#151)
Browse files Browse the repository at this point in the history
Co-authored-by: Viktor Makoviichuk <[email protected]>
  • Loading branch information
Denys88 and DenSumy authored Apr 30, 2022
1 parent 1d7f3e5 commit bad2ecd
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 2 deletions.
64 changes: 64 additions & 0 deletions rl_games/configs/mujoco/ant.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
params:
seed: 5
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [256, 128, 64]
activation: elu
initializer:
name: default
config:
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.995
tau: 0.95

learning_rate: 3e-4
name: Ant-v3
score_to_win: 10000

grad_norm: 0.5
entropy_coef: 0.0
truncate_grads: True
env_name: openai_gym
ppo: true
e_clip: 0.2
clip_value: False
num_actors: 16
horizon_length: 128
minibatch_size: 512
mini_epochs: 4
critic_coef: 1
lr_schedule: adaptive
kl_threshold: 0.008
schedule_type: 'standard'
normalize_input: True
normalize_value: True
value_bootstrap: True
bounds_loss_coef: 0.000
max_epochs: 5000
env_config:
name: Ant-v3
seed: 5
#flat_observation: True

player:
render: True
64 changes: 64 additions & 0 deletions rl_games/configs/mujoco/ant_envpool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
params:
seed: 5
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [256, 128, 64]
activation: elu
initializer:
name: default
config:
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.995
tau: 0.95

learning_rate: 3e-4
name: Ant-v4
score_to_win: 10000

grad_norm: 0.5
entropy_coef: 0.0
truncate_grads: True
env_name: envpool
ppo: true
e_clip: 0.2
clip_value: False
num_actors: 64
horizon_length: 64
minibatch_size: 1024
mini_epochs: 4
critic_coef: 1
lr_schedule: adaptive
kl_threshold: 0.008
schedule_type: 'standard'
normalize_input: True
normalize_value: True
value_bootstrap: True
bounds_loss_coef: 0.000
max_epochs: 5000
env_config:
env_name: Ant-v4
seed: 5
#flat_observation: True

player:
render: True
5 changes: 3 additions & 2 deletions rl_games/envs/envpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ def __init__(self, config_name, num_actors, **kwargs):
env_type=kwargs.pop('env_type', 'gym'),
num_envs=num_actors,
batch_size=self.batch_size,
episodic_life=kwargs.pop('episodic_life', True),
reward_clip=kwargs.pop('reward_clip', False) # thread_affinity=False,
**kwargs
)

self.observation_space = self.env.observation_space
Expand All @@ -26,6 +25,8 @@ def __init__(self, config_name, num_actors, **kwargs):

def _set_scores(self, infos, dones):
# thanks to cleanrl: https://github.com/vwxyzjn/cleanrl/blob/3d20d11f45a5f1d764934e9851b816d0b03d2d10/cleanrl/ppo_atari_envpool.py#L111
if 'reward' not in infos:
return
self.scores += infos["reward"]
self.returned_scores[:] = self.scores
infos["scores"] = self.returned_scores
Expand Down

0 comments on commit bad2ecd

Please sign in to comment.