我在 AWS DMS 中运行 8-10 个正在进行的复制任务,每个任务中有多个表。但是一项特定的任务会失败~每周两次。它有一个高度事务性的表,其中每 10 分钟发生约 100 万次更新。
错误-
Reading from source endpoint temporary paused as total storage used by swap files exceeded the limit for task: abc-xyz
Cloudwatch 中的信息 -
Reading from source is paused. Total storage used by swap files exceeded the limit 1048576000 bytes
所有移民前评估通过。
所有表都有一个主键,并且在失败的任务中没有 LOB 或 CLOB 列。任务详情:
- 来源-甲骨文
- 目标 - 红移
- 复制实例 - dms.c5.2xlarge,单可用区
- 平均 CPU 利用率 - ~35 %
- 使用 LOG 矿工技术。
任务设置 JSON:
{
"Logging": {
"EnableLogging": true,
"LogComponents": [
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "TRANSFORMATION"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "SOURCE_UNLOAD"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "IO"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "TARGET_LOAD"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "PERFORMANCE"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "SOURCE_CAPTURE"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "SORTER"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "REST_SERVER"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "VALIDATOR_EXT"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "TARGET_APPLY"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "TASK_MANAGER"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "TABLES_MANAGER"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "METADATA_MANAGER"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "FILE_FACTORY"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "COMMON"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "ADDONS"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "DATA_STRUCTURE"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "COMMUNICATION"
},
{
"Severity": "LOGGER_SEVERITY_DEFAULT",
"Id": "FILE_TRANSFER"
}
],
"CloudWatchLogGroup": "dms-tasks-rep-instance-poc-3",
"CloudWatchLogStream": "dms-task-abc-ongoing-task"
},
"StreamBufferSettings": {
"StreamBufferCount": 3,
"CtrlStreamBufferSizeInMB": 5,
"StreamBufferSizeInMB": 8
},
"ErrorBehavior": {
"FailOnNoTablesCaptured": false,
"ApplyErrorUpdatePolicy": "LOG_ERROR",
"FailOnTransactionConsistencyBreached": false,
"RecoverableErrorThrottlingMax": 1800,
"DataErrorEscalationPolicy": "SUSPEND_TABLE",
"ApplyErrorEscalationCount": 0,
"RecoverableErrorStopRetryAfterThrottlingMax": false,
"RecoverableErrorThrottling": true,
"ApplyErrorFailOnTruncationDdl": false,
"DataTruncationErrorPolicy": "LOG_ERROR",
"ApplyErrorInsertPolicy": "LOG_ERROR",
"ApplyErrorEscalationPolicy": "LOG_ERROR",
"RecoverableErrorCount": -1,
"DataErrorEscalationCount": 0,
"TableErrorEscalationPolicy": "STOP_TASK",
"RecoverableErrorInterval": 5,
"ApplyErrorDeletePolicy": "IGNORE_RECORD",
"TableErrorEscalationCount": 0,
"FullLoadIgnoreConflicts": true,
"DataErrorPolicy": "LOG_ERROR",
"TableErrorPolicy": "SUSPEND_TABLE"
},
"TTSettings": {
"TTS3Settings": null,
"TTRecordSettings": null,
"EnableTT": false
},
"FullLoadSettings": {
"CommitRate": 10000,
"StopTaskCachedChangesApplied": false,
"StopTaskCachedChangesNotApplied": false,
"MaxFullLoadSubTasks": 8,
"TransactionConsistencyTimeout": 600,
"CreatePkAfterFullLoad": false,
"TargetTablePrepMode": "DO_NOTHING"
},
"TargetMetadata": {
"ParallelApplyBufferSize": 500,
"ParallelApplyQueuesPerThread": 0,
"ParallelApplyThreads": 32,
"TargetSchema": "",
"InlineLobMaxSize": 0,
"ParallelLoadQueuesPerThread": 0,
"SupportLobs": true,
"LobChunkSize": 0,
"TaskRecoveryTableEnabled": false,
"ParallelLoadThreads": 8,
"LobMaxSize": 32,
"BatchApplyEnabled": true,
"FullLobMode": false,
"LimitedSizeLobMode": true,
"LoadMaxFileSize": 0,
"ParallelLoadBufferSize": 500
},
"BeforeImageSettings": null,
"ControlTablesSettings": {
"historyTimeslotInMinutes": 5,
"HistoryTimeslotInMinutes": 5,
"StatusTableEnabled": false,
"SuspendedTablesTableEnabled": false,
"HistoryTableEnabled": false,
"ControlSchema": "",
"FullLoadExceptionTableEnabled": false
},
"LoopbackPreventionSettings": null,
"CharacterSetSettings": null,
"FailTaskWhenCleanTaskResourceFailed": false,
"ChangeProcessingTuning": {
"StatementCacheSize": 50,
"CommitTimeout": 1,
"BatchApplyPreserveTransaction": true,
"BatchApplyTimeoutMin": 1,
"BatchSplitSize": 0,
"BatchApplyTimeoutMax": 30,
"MinTransactionSize": 1000,
"MemoryKeepTime": 60,
"BatchApplyMemoryLimit": 500,
"MemoryLimitTotal": 1024
},
"ChangeProcessingDdlHandlingPolicy": {
"HandleSourceTableDropped": true,
"HandleSourceTableTruncated": true,
"HandleSourceTableAltered": true
},
"PostProcessingRules": null
}
它是由于单个 AZ 而不是多 AZ CPU 造成的,还是有其他主要原因?