我正在尝试创建一个自定义 openai 健身房环境以用于光线强化学习环境。
目前我有一个 (6, 94) 的观察空间。但是,当我尝试在自定义环境中进行训练时,出现错误:
```
ValueError: No default configuration for obs shape [6, 94], you must specify `conv_filters` manually as a model option. Default configurations are only available for inputs of shape [42, 42, K] and [84, 84, K]. You may alternatively want to use a custom model or preprocessor.
```
文档内容如下:
``` Note that you’ll probably have to configure conv_filters if your environment observations have custom sizes, e.g.,"model": {"dim": 42, "conv_filters": [[16, [4, 4], 2], [32, [4,4], 2], [512, [11, 11], 1]]}for 42x42 observations.```
它没有具体说明要实现什么,也没有任何指向文献的链接来指定“模型”和“conv_filters”应该做什么。
对于 (6,94) 的情况,“dim”应该是 6 还是 94?conv_filters 是保持原样,还是每个形状都需要某种网络架构?如果是这样,链接在哪里?
2019-07-22 12:52:30,562 INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-22_12-52-30_528225_14/logs.
2019-07-22 12:52:31,285 INFO services.py:409 -- Waiting for redis server at 127.0.0.1:47266 to respond...
2019-07-22 12:52:31,650 INFO services.py:409 -- Waiting for redis server at 127.0.0.1:56261 to respond...
2019-07-22 12:52:31,655 INFO services.py:806 -- Starting Redis shard with 3.42 GB max memory.
2019-07-22 12:52:32,488 INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-07-22_12-52-30_528225_14/logs.
2019-07-22 12:52:32,493 INFO services.py:1446 -- Starting the Plasma object store with 5.13 GB memory using /dev/shm.
W0722 12:52:47.568245 140718767867648 deprecation_wrapper.py:119] From /home/my_user/.local/lib/python3.5/site-packages/ray/tune/logger.py:136: The name tf.VERSION is deprecated. Please use tf.version.VERSION instead.
W0722 12:52:47.570142 140718767867648 deprecation_wrapper.py:119] From /home/my_user/.local/lib/python3.5/site-packages/ray/tune/logger.py:141: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.
2019-07-22 12:52:49,538 WARNING ppo.py:151 -- FYI: By default, the value function will not share layers with the policy model ('vf_share_layers': False).
2019-07-22 12:52:54,630 INFO rollout_worker.py:301 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-8da763475e1e> in <module>
23 }
24 #resources = PPOTrainer.default_resource_request(config).to_json()
---> 25 trainer = PPOTrainer(env = my_env, config = config)
26 for _ in range(10):
27 result = trainer.train()
~/.local/lib/python3.5/site-packages/ray/rllib/agents/trainer_template.py in __init__(self, config, env, logger_creator)
85
86 def __init__(self, config=None, env=None, logger_creator=None):
---> 87 Trainer.__init__(self, config, env, logger_creator)
88
89 def _init(self, config, env_creator):
~/.local/lib/python3.5/site-packages/ray/rllib/agents/trainer.py in __init__(self, config, env, logger_creator)
321 logger_creator = default_logger_creator
322
--> 323 Trainable.__init__(self, config, logger_creator)
324
325 @classmethod
~/.local/lib/python3.5/site-packages/ray/tune/trainable.py in __init__(self, config, logger_creator)
85 self._iterations_since_restore = 0
86 self._restored = False
---> 87 self._setup(copy.deepcopy(self.config))
88 self._local_ip = ray.services.get_node_ip_address()
89
~/.local/lib/python3.5/site-packages/ray/rllib/agents/trainer.py in _setup(self, config)
436
437 with get_scope():
--> 438 self._init(self.config, self.env_creator)
439
440 # Evaluation related
~/.local/lib/python3.5/site-packages/ray/rllib/agents/trainer_template.py in _init(self, config, env_creator)
104 else:
105 self.workers = self._make_workers(env_creator, policy, config,
--> 106 self.config["num_workers"])
107 if make_policy_optimizer:
108 self.optimizer = make_policy_optimizer(self.workers, config)
~/.local/lib/python3.5/site-packages/ray/rllib/agents/trainer.py in _make_workers(self, env_creator, policy, config, num_workers)
481 config,
482 num_workers=num_workers,
--> 483 logdir=self.logdir)
484
485 @DeveloperAPI
~/.local/lib/python3.5/site-packages/ray/rllib/evaluation/worker_set.py in __init__(self, env_creator, policy, trainer_config, num_workers, logdir, _setup)
62 # Always create a local worker
63 self._local_worker = self._make_worker(
---> 64 RolloutWorker, env_creator, policy, 0, self._local_config)
65
66 # Create a number of remote workers
~/.local/lib/python3.5/site-packages/ray/rllib/evaluation/worker_set.py in _make_worker(self, cls, env_creator, policy, worker_index, config)
212 remote_env_batch_wait_ms=config["remote_env_batch_wait_ms"],
213 soft_horizon=config["soft_horizon"],
--> 214 _fake_sampler=config.get("_fake_sampler", False))
~/.local/lib/python3.5/site-packages/ray/rllib/evaluation/rollout_worker.py in __init__(self, env_creator, policy, policy_mapping_fn, policies_to_train, tf_session_creator, batch_steps, batch_mode, episode_horizon, preprocessor_pref, sample_async, compress_observations, num_envs, observation_filter, clip_rewards, clip_actions, env_config, model_config, policy_config, worker_index, monitor_path, log_dir, log_level, callbacks, input_creator, input_evaluation, output_creator, remote_worker_envs, remote_env_batch_wait_ms, soft_horizon, _fake_sampler)
309 with self.tf_sess.as_default():
310 self.policy_map, self.preprocessors = \
--> 311 self._build_policy_map(policy_dict, policy_config)
312 else:
313 self.policy_map, self.preprocessors = self._build_policy_map(
~/.local/lib/python3.5/site-packages/ray/rllib/evaluation/rollout_worker.py in _build_policy_map(self, policy_dict, policy_config)
713 if tf:
714 with tf.variable_scope(name):
--> 715 policy_map[name] = cls(obs_space, act_space, merged_conf)
716 else:
717 policy_map[name] = cls(obs_space, act_space, merged_conf)
~/.local/lib/python3.5/site-packages/ray/rllib/policy/tf_policy_template.py in __init__(self, obs_space, action_space, config, existing_inputs)
126 make_action_sampler=make_action_sampler,
127 existing_inputs=existing_inputs,
--> 128 obs_include_prev_action_reward=obs_include_prev_action_reward)
129
130 if after_init:
~/.local/lib/python3.5/site-packages/ray/rllib/policy/dynamic_tf_policy.py in __init__(self, obs_space, action_space, config, loss_fn, stats_fn, update_ops_fn, grad_stats_fn, before_loss_init, make_action_sampler, existing_inputs, get_batch_divisibility_req, obs_include_prev_action_reward)
137 self.config["model"],
138 state_in=existing_state_in,
--> 139 seq_lens=existing_seq_lens)
140 self.action_dist = self.dist_class(self.model.outputs)
141 action_sampler = self.action_dist.sample()
~/.local/lib/python3.5/site-packages/ray/rllib/models/catalog.py in get_model(input_dict, obs_space, action_space, num_outputs, options, state_in, seq_lens)
226 model = ModelCatalog._get_model(input_dict, obs_space, action_space,
227 num_outputs, options, state_in,
--> 228 seq_lens)
229
230 if options.get("use_lstm"):
~/.local/lib/python3.5/site-packages/ray/rllib/models/catalog.py in _get_model(input_dict, obs_space, action_space, num_outputs, options, state_in, seq_lens)
263 if obs_rank > 1:
264 return VisionNetwork(input_dict, obs_space, action_space,
--> 265 num_outputs, options)
266
267 return FullyConnectedNetwork(input_dict, obs_space, action_space,
~/.local/lib/python3.5/site-packages/ray/rllib/models/model.py in __init__(self, input_dict, obs_space, action_space, num_outputs, options, state_in, seq_lens)
83 input_dict["obs"], obs_space)
84 self.outputs, self.last_layer = self._build_layers_v2(
---> 85 restored, num_outputs, options)
86 except NotImplementedError:
87 self.outputs, self.last_layer = self._build_layers(
~/.local/lib/python3.5/site-packages/ray/rllib/models/visionnet.py in _build_layers_v2(self, input_dict, num_outputs, options)
19 filters = options.get("conv_filters")
20 if not filters:
---> 21 filters = _get_filter_config(inputs.shape.as_list()[1:])
22
23 activation = get_activation_fn(options.get("conv_activation"))
~/.local/lib/python3.5/site-packages/ray/rllib/models/visionnet.py in _get_filter_config(shape)
70 raise ValueError(
71 "No default configuration for obs shape {}".format(shape) +
---> 72 ", you must specify `conv_filters` manually as a model option. "
73 "Default configurations are only available for inputs of shape "
74 "[42, 42, K] and [84, 84, K]. You may alternatively want "
ValueError: No default configuration for obs shape [6, 94], you must specify `conv_filters` manually as a model option. Default configurations are only available for inputs of shape [42, 42, K] and [84, 84, K]. You may alternatively want to use a custom model or preprocessor.
编辑:事实证明它想要 CNN 的参数,现在尝试调整值,使其输出到所需的输出空间。
编辑2:
github 上提供的目录不是最新的 0.7.2:
https://github.com/ray-project/ray/blob/master/python/ray/rllib/models/catalog.py
Unknown config key `no_final_linear`, all keys: ['free_log_std', 'grayscale', 'framestack', 'conv_activation', 'squash_to_range', 'zero_mean', 'custom_model', 'fcnet_activation', 'custom_options', 'conv_filters', 'fcnet_hiddens', 'lstm_use_prev_action_reward', 'custom_preprocessor', 'max_seq_len', 'use_lstm', 'lstm_cell_size', 'dim']