0

尝试将 tensorflow 暂存区与数据集 api 结合起来。

compute_stage_put_op = compute_stage.put(iterator.get_next())
if compute_stage_put_op.type == 'Stage':
   compute_stage_ops.append(compute_stage_put_op)

完成几个 100 步后出现以下错误。

ValueError: Fetch argument <tf.Operation 'group_deps' type=NoOp> 
            cannot be interpreted as a Tensor. (Operation name: 
            "group_deps" op: "NoOp")

堆栈跟踪:

Traceback (most recent call last):

文件“timit_trainer.py”,第 5 行,在 timit_trainer.train() 文件“/mnt/sdc/nlp/workspace/hci/nlp/mapc/core/model/model.py”,第 43 行,训练 hparams=self .hyper_params # HParams 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/learn_runner.py”,第 218 行,运行中返回 _execute_schedule(experiment, schedule) 文件“ /usr/local/lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/learn_runner.py”,第 46 行,_execute_schedule 返回任务()文件“/usr/local/lib/python3. 5/dist-packages/tensorflow/contrib/learn/python/learn/experiment.py”,第 625 行,在 train_and_evaluate self.train(delay_secs=0) 文件“/usr/local/lib/python3.5/dist-packages /tensorflow/contrib/learn/python/learn/experiment.py”,第 367 行,在 train hooks=self._train_monitors + extra_hooks) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/experiment.py”,第 807 行,在 _call_train hooks=hooks ) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py”,第 302 行,在 train loss = self._train_model(input_fn, hooks, Saving_listeners) 文件“/usr /local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py”,第 783 行,在 _train_model _中,loss = mon_sess.run([estimator_spec.train_op,estimator_spec.loss]) 文件“/ usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py”,第 521 行,在运行 run_metadata=run_metadata) 文件“/usr/local/lib/python3.5/dist-packages /tensorflow/python/training/monitored_session.py",第 892 行,运行中 run_metadata=run_metadata) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py”,第 967 行,运行中 raise Six.reraise(*original_exc_info)文件“/usr/local/lib/python3.5/dist-packages/six.py”,第 693 行,在 reraise raise value 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/ training/monitored_session.py”,第 952 行,运行中返回 self._sess.run(*args, **kwargs) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session .py”,第 1032 行,运行中 run_metadata=run_metadata)) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py”,第 1196 行,在after_run 诱导停止 = m.step_end(self._last_step, 结果) 文件 "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py”,第 356 行,在 step_end 返回 self.every_n_step_end(step, output) 文件“/usr/local /lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py”,第 694 行,在 every_n_step_end validation_outputs = self._evaluate_estimator() 文件“/usr/local/lib/python3.5 /dist-packages/tensorflow/contrib/learn/python/learn/monitors.py”,第 665 行,在 _evaluate_estimator name=self.name) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/ python/estimator/estimator.py”,第 355 行,在评估名称=名称)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py”,第 839 行,在_evaluate_model config=self._session_config) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/evaluation.py”,第 206 行,在 _evaluate_once session.run(eval_ops, feed_dict) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python /training/monitored_session.py”,第 521 行,运行中 run_metadata=run_metadata) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py”,第 892 行,运行中run_metadata=run_metadata) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py”,第 967 行,在运行 raise Six.reraise(*original_exc_info) 文件“/usr/ local/lib/python3.5/dist-packages/six.py”,第 693 行,在 reraise raise value 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py ",第 952 行,在运行中返回 self._sess。运行(*args,**kwargs)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py”,第 1024 行,运行中 run_metadata=run_metadata)文件“/usr /local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session.py”,第 827 行,运行中返回 self._sess.run(*args, **kwargs) 文件“/usr/local/ lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 889 行,在运行 run_metadata_ptr) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client /session.py”,第 1105 行,在 _run self._graph、fetches、feed_dict_tensor、feed_handles=feed_handles) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”中,第 414 行,在5/dist-packages/tensorflow/python/training/monitored_session.py”,第 1024 行,在运行 run_metadata=run_metadata) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session .py”,第 827 行,在运行中返回 self._sess.run(*args, **kwargs) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py” ,第 889 行,在运行 run_metadata_ptr) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 1105 行,在 _run self._graph、fetches、feed_dict_tensor、feed_handles =feed_handles) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 414 行,在5/dist-packages/tensorflow/python/training/monitored_session.py”,第 1024 行,在运行 run_metadata=run_metadata) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/monitored_session .py”,第 827 行,在运行中返回 self._sess.run(*args, **kwargs) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py” ,第 889 行,在运行 run_metadata_ptr) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 1105 行,在 _run self._graph、fetches、feed_dict_tensor、feed_handles =feed_handles) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 414 行,在5/dist-packages/tensorflow/python/training/monitored_session.py”,第 827 行,运行中返回 self._sess.run(*args, **kwargs) 文件“/usr/local/lib/python3.5/dist -packages/tensorflow/python/client/session.py”,第 889 行,在运行 run_metadata_ptr)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,行1105,在_run self._graph,fetches,feed_dict_tensor,feed_handles=feed_handles)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第414行,在5/dist-packages/tensorflow/python/training/monitored_session.py”,第 827 行,运行中返回 self._sess.run(*args, **kwargs) 文件“/usr/local/lib/python3.5/dist -packages/tensorflow/python/client/session.py”,第 889 行,在运行 run_metadata_ptr)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,行1105,在_run self._graph,fetches,feed_dict_tensor,feed_handles=feed_handles)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第414行,在5/dist-packages/tensorflow/python/client/session.py”,第 1105 行,在 _run self._graph、fetches、feed_dict_tensor、feed_handles=feed_handles) 文件“/usr/local/lib/python3.5/dist-packages /tensorflow/python/client/session.py”,第 414 行,在5/dist-packages/tensorflow/python/client/session.py”,第 1105 行,在 _run self._graph、fetches、feed_dict_tensor、feed_handles=feed_handles) 文件“/usr/local/lib/python3.5/dist-packages /tensorflow/python/client/session.py”,第 414 行,在init self._fetch_mapper = _FetchMapper.for_fetch(fetches) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 236 行,在 for_fetch 返回 _DictFetchMapper(fetch) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 374 行,在init 中获取 fetches.values()] 文件“/usr/local/lib/ python3.5/dist-packages/tensorflow/python/client/session.py”,第 374 行,在 fetches.values()] 文件中获取“/usr/local/lib/python3.5/dist-packages/tensorflow /python/client/session.py”,第 234 行,在 for_fetch 返回 _ListFetchMapper(fetch) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 341 行, 在初始化 self._mappers = [_FetchMapper.for_fetch(fetch) for fetches] 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 341 行,在 self. _mappers = [_FetchMapper.for_fetch(fetch) for fetches] 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 234 行,在 for_fetch return _ListFetchMapper(获取)文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 341 行,在init self._mappers = [_FetchMapper.for_fetch(fetch) for fetches] 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 341 行,在 self. _mappers = [_FetchMapper.for_fetch(fetch) for fetches] 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 242 行,在 for_fetch return _ElementFetchMapper( fetches,contraction_fn) 文件“/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py”,第 278 行,在init 'Tensor. (%s)' % (fetch, str(e))) ValueError: Fetch 参数不能被解释为张量。(操作名称:“StagingArea_put”

代码:

def read(self, category: DatasetCategory, devices: list, proc_device: str, shuffle=False):

    batch_size = ds.BATCH_SIZE

    record_store_exists, record_store = self.__get_store_info(store_path=fu.join_path(self.store_dir, self.store_name + '_' + category.name + '.tfrecord'), create_new=False)
    logger.info('Reading records. category: {}, store_exists:{}, store;{}'.format(category.name, str(record_store_exists), record_store))
    if not record_store_exists:
        self.__process()
        record_store_exists, record_store = self.__get_store_info(store_path=fu.join_path(self.store_dir, self.store_name + '_' + category.name + '.tfrecord'), create_new=False)

    iterator_init_hook = SessionRunHook()
    map_fn = self.__parse_function

    gpu_copy_stage_ops = []
    gpu_compute_stage_ops = []

    def input_fn():

        file_names = tf.placeholder(dtype=tf.string, shape=[None], name='data_store')
        dataset = tf.data.TFRecordDataset(filenames=file_names, buffer_size=2000000000)  # 2.0GB
        dataset = dataset.map(map_func=map_fn, num_parallel_calls=tf.constant(value=20000, dtype=tf.int32))
        if shuffle:
            dataset = dataset.shuffle(buffer_size=tf.constant(value=1000 * batch_size, dtype=tf.int64))

        dataset = dataset.repeat(None)  # Infinite iterations
        dataset = dataset.padded_batch(batch_size=tf.constant(value=batch_size, dtype=tf.int64), padded_shapes=([None, ds.NUM_INPUT_FEATURES], [], [None], []))

        iterator = dataset.make_initializable_iterator()
        iterator_init_hook.run_func = lambda session: session.run(iterator.initializer, feed_dict={file_names: [record_store]})

        gpu_copy_stage = StagingArea(dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
                                     shapes=[[batch_size, None, ds.NUM_INPUT_FEATURES], [batch_size], [batch_size, None], [batch_size]])

        gpu_compute_stage = StagingArea(dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
                                        shapes=[[batch_size, None, ds.NUM_INPUT_FEATURES], [batch_size], [batch_size, None], [batch_size]])

        features_dict = {}
        labels_dict = {}
        for index, device in enumerate(devices):
            with tf.device(proc_device):
                gpu_copy_stage_ops.append(gpu_copy_stage.put(iterator.get_next()))
            with tf.device(device):
                gpu_compute_stage_ops.append(gpu_compute_stage.put(gpu_copy_stage.get()))
                source, source_len, target, target_len = gpu_compute_stage.get()
                if ds.USE_WARP_CTC:
                    targets = []
                    for bi in range(batch_size):
                        targets.append(target[bi])
                    target = tf.concat(targets, axis=0)

                features_dict[device] = {'source': source, 'source_len': source_len}
                labels_dict[device] = {'target': target, 'target_len': target_len}

        return features_dict, labels_dict

    copy_stage_hook = StepOpsRunHook(ops=[gpu_copy_stage_ops], every_n_secs=1)
    compute_stage_hook = StepOpsRunHook(ops=[gpu_compute_stage_ops], every_n_steps=1)

    return input_fn, [iterator_init_hook, copy_stage_hook, compute_stage_hook]
4

0 回答 0