我在 Azure ML SDK 中做一个管道。在我运行管道一段时间后,它报告我已达到 300MB 的快照限制。我遵循了一些建议的修复:
每个步骤脚本都移动到一个单独的子文件夹
我在管道中添加了一个数据存储
添加了这一行:
azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 1000
但是在我提交管道后发生了一个新的快照错误:
pipeline1 = Pipeline(default_source_directory=".", default_datastore=def_blob_store, workspace=ws, steps=[prep_step, hd_step, register_model_step])
错误信息:
WARNING:root:If 'script' has been provided here and a script file name has been specified in 'run_config', 'script' provided in ScriptRunConfig initialization will take precedence.
---------------------------------------------------------------------------
SnapshotException Traceback (most recent call last)
<ipython-input-14-05c5aa4991aa> in <module>
----> 1 pipeline1 = Pipeline(default_source_directory=".", default_datastore=def_blob_store, workspace=ws, steps=[prep_step, hd_step, register_model_step])
2 pipeline1.validate()
3 pipeline_run = Experiment(ws, 'health_insuarance').submit(pipeline1, regenerate_outputs=False)
4 RunDetails(pipeline_run).show()
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/_experiment_method.py in wrapper(self, *args, **kwargs)
95 """
96 ExperimentSubmitRegistrar.register_submit_function(self.__class__, submit_function)
---> 97 return init_func(self, *args, **kwargs)
98 return wrapper
99 return real_decorator
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/pipeline.py in __init__(self, workspace, steps, description, default_datastore, default_source_directory, resolve_closure, _workflow_provider, _service_endpoint, **kwargs)
175 raise ValueError('parameter %s is not recognized for Pipeline ' % key)
176 self._enable_email_notification = enable_email_notification
--> 177 self._graph = self._graph_builder.build(self._name, steps, finalize=False)
178
179 def _set_experiment_name(self, name):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/builder.py in build(self, name, steps, finalize, regenerate_outputs)
1479 pass
1480
-> 1481 graph = self.construct(name, steps)
1482 if finalize:
1483 graph.finalize(regenerate_outputs=regenerate_outputs)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/builder.py in construct(self, name, steps)
1501 self._graph = Graph(name, self._context)
1502 self._nodeStack.append([])
-> 1503 self.process_collection(steps)
1504 for builder in self._builderStack[::-1]:
1505 builder.apply_rules()
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/builder.py in process_collection(self, collection)
1537 self._nodeStack.append([])
1538 self._builderStack.append(builder)
-> 1539 builder.process_collection(collection)
1540 added_nodes = self._nodeStack.pop()
1541 self._nodeStack[-1].extend(added_nodes)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/builder.py in process_collection(self, collection)
1828 """
1829 for item in collection:
-> 1830 self._base_builder.process_collection(item)
1831
1832 def apply_rules(self):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/builder.py in process_collection(self, collection)
1531 # just a step?
1532 if isinstance(collection, PipelineStep):
-> 1533 return self.process_step(collection)
1534
1535 # delegate to correct builder
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/core/builder.py in process_step(self, step)
1575 return self._step2node[step]
1576
-> 1577 node = step.create_node(self._graph, self._default_datastore, self._context)
1578 self.assert_node_valid(step, self._graph, node)
1579
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/steps/hyper_drive_step.py in create_node(self, graph, default_datastore, context)
247 """
248 hyperdrive_config, reuse_hashable_config = self._get_hyperdrive_config(context._workspace,
--> 249 context._experiment_name)
250 self._params[HyperDriveStep._run_config_param_name] = json.dumps(hyperdrive_config)
251 self._params[HyperDriveStep._run_reuse_hashable_config] = json.dumps(reuse_hashable_config)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/pipeline/steps/hyper_drive_step.py in _get_hyperdrive_config(self, workspace, experiment_name)
323
324 hyperdrive_dto = _search._create_experiment_dto(self._hyperdrive_config, workspace,
--> 325 experiment_name, telemetry_values)
326
327 hyperdrive_config = hyperdrive_dto.as_dict()
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/hyperdrive/_search.py in _create_experiment_dto(hyperdrive_config, workspace, experiment_name, telemetry_values, activity_logger, **kwargs)
41 if hyperdrive_config.source_directory is not None:
42 snapshot_client = SnapshotsClient(workspace.service_context)
---> 43 snapshot_id = snapshot_client.create_snapshot(hyperdrive_config.source_directory)
44
45 if activity_logger is not None:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_restclient/snapshots_client.py in create_snapshot(self, file_or_folder_path, retry_on_failure, raise_on_validation_failure)
83 exclude_function = ignore_file.is_file_excluded
84
---> 85 self._validate_snapshot_size(file_or_folder_path, exclude_function, raise_on_validation_failure)
86
87 # Get the previous snapshot for this project
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_restclient/snapshots_client.py in _validate_snapshot_size(self, file_or_folder_path, exclude_function, raise_on_validation_failure)
61 "\n".format(file_or_folder_path, SNAPSHOT_MAX_SIZE_BYTES / ONE_MB)
62 if raise_on_validation_failure:
---> 63 raise SnapshotException(error_message)
64 else:
65 self._logger.warning(error_message)
SnapshotException: SnapshotException:
Message: ====================================================================
While attempting to take snapshot of ./train/
Your total snapshot size exceeds the limit of 0.00095367431640625 MB.
Please see http://aka.ms/aml-largefiles on how to work with large files.
====================================================================
InnerException None
ErrorResponse
{
"error": {
"message": "====================================================================\n\nWhile attempting to take snapshot of ./train/\nYour total snapshot size exceeds the limit of 0.00095367431640625 MB.\nPlease see http://aka.ms/aml-largefiles on how to work with large files.\n\n====================================================================\n\n"
}
}
知道我如何解决这个问题吗?
完整脚本在这里:Github 上的脚本