6

我们有大型 MongoDB 数据库(大约 140 万个集合)、MongoDB 3.0、引擎 RocksDB、操作系统 Ubuntu 14.04。

该数据库位于具有 16 个内核和 108 GB RAM 的虚拟机 (VmWare vCloud) 上(当前 mongoDB 使用 70GB 内存而没有交换)。

生产设置选项:

  • 专用分区上的数据 - XFS 文件系统
  • 已启用 transparent_hugepage - 从不
  • transparent_hugepage 碎片整理 - 从不

数据库统计:

{
    "db" : "ctp",
    "collections" : 1369486,
    "objects" : 20566852,
    "avgObjSize" : 1126.82749999854,
    "dataSize" : 23175294422,
    "storageSize" : 23231888384,
    "numExtents" : 0,
    "indexes" : 6686175,
    "indexSize" : 685981393,
    "ok" : 1
}

样本集合大小:

{
    "ns" : "ctp._cf123_ct49_dfc-r_dtc-r_tof2_groupat",
    "count" : 33,
    "size" : 38172,
    "avgObjSize" : 1156,
    "storageSize" : 38144,
    "capped" : false,
    "nindexes" : 5,
    "totalIndexSize" : 6312,
    "indexSizes" : {
        "_id_" : 18,
        "exAt" : 16,
        "unique" : 6246,
        "_smp" : 10,
        "_smpdf" : 22
    },
    "ok" : 1
}

{
    "ns" : "ctp._afpoznan123_atlondyn49_df2016-09_dt2016-09_tof2_groupdfdt",
    "count" : 188,
    "size" : 208677,
    "avgObjSize" : 1109,
    "storageSize" : 208640,
    "capped" : false,
    "nindexes" : 5,
    "totalIndexSize" : 7945,
    "indexSizes" : {
        "_id_" : 2845,
        "exAt" : 256,
        "_smp" : 160,
        "_smpdf" : 352,
        "unique" : 4332
    },
    "ok" : 1
}
{
    "ns" : "ctp._cf123_ct42_dfc-r_dtc-r_tof2_groupat",
    "count" : 27,
    "size" : 30400,
    "avgObjSize" : 1125,
    "storageSize" : 30208,
    "capped" : false,
    "nindexes" : 5,
    "totalIndexSize" : 84,
    "indexSizes" : {
        "_id_" : 18,
        "exAt" : 16,
        "unique" : 18,
        "_smp" : 10,
        "_smpdf" : 22
    },
    "ok" : 1
}

每隔 5 分钟定期运行脚本,该脚本会写入这些集合并在此集合不存在时创建新集合(集合名称基于这些集合中的数据)并创建索引。

我们注意到,在将数据写入集合期间,该服务器出现了一些冻结。这种冻结可能需要 5 到 60 秒。

有没有人遇到过这个问题并可以帮助我们?

以下是冻结时刻的一些日志:

db.currentOP();

  "opid" : 22717868,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf115_atboma25_dfc-r_dtc-r_tof2_groupdfym",
        "query" : {
            "$query" : {
                "T#df" : {
                    "$lt" : "2017-02-28"
                }
            },
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:33832",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(22004831)
                }
            }
        }
    }, 
    {
        "desc" : "conn135907",
        "threadId" : "0xc3e5d64e0",
        "connectionId" : 135907,
        "opid" : 22719375,
        "active" : true,
        "secs_running" : 0,
        "microsecs_running" : NumberLong(223601),
        "op" : "query",
        "ns" : "top_search.top_searches",
        "query" : {
            "$msg" : "query not recording (too large)"
        },
        "planSummary" : "IXSCAN { T#df: 1, T#dt: 1 }",
        "client" : "192.168.1.33:33648",
        "numYields" : 170,
        "locks" : {
            "Global" : "r",
            "Database" : "r",
            "Collection" : "r"
        },
        "waitingForLock" : false,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(342)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(171)
                }
            },
            "Collection" : {
                "acquireCount" : {
                    "r" : NumberLong(171)
                }
            }
        }
    }, 
    {
        "desc" : "conn135959",
        "threadId" : "0x10d4445260",
        "connectionId" : 135959,
        "opid" : 22718533,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._afoxford-house23_attamarindo32_dfc-r_dtc-r_tof2_groupdfdt",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1),
                "T#df" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:34022",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(15003580)
                }
            }
        }
    }, 
    {
        "desc" : "conn135829",
        "threadId" : "0x10d4445740",
        "connectionId" : 135829,
        "opid" : 22717923,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._ct123_dfc-r_dtc-r_tof2_groupdfym",
        "query" : {
            "$query" : {
                "T#df" : {
                    "$lt" : "2017-02-28"
                }
            },
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:33026",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(21004810)
                }
            }
        }
    }, 
    {
        "desc" : "conn135781",
        "threadId" : "0x2d678e0",
        "connectionId" : 135781,
        "opid" : 22718920,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf1147_atrostock36_df2016-06_dtc-r_tof2_groupaf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:60874",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(12002770)
                }
            }
        }
    }, 
    {
        "desc" : "conn135870",
        "threadId" : "0xd04ed5d40",
        "connectionId" : 135870,
        "opid" : 22719172,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf61_atpristina131_dfc-r_dtc-r_tof2_groupaf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:33369",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(7001590)
                }
            }
        }
    }, 
    {
        "desc" : "conn135687",
        "threadId" : "0xc3e5d7380",
        "connectionId" : 135687,
        "opid" : 22717925,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf105_athana156_df2016-06_dt2016-06_tof2_groupaf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:60022",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(21003871)
                }
            }
        }
    }, 
    {
        "desc" : "conn135754",
        "threadId" : "0xd04ed5860",
        "connectionId" : 135754,
        "opid" : 22718485,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf5_atdhaka1113_dfc-r_dtc-r_tof2_groupaf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:60603",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(15503084)
                }
            }
        }
    }, 
    {
        "desc" : "conn135644",
        "threadId" : "0xc3e5d9c20",
        "connectionId" : 135644,
        "opid" : 22719073,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._afgenua71_ataarhus37_dfc-r_dtc-r_tof2_groupdfdt",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1),
                "T#df" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:59698",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(7501602)
                }
            }
        }
    }, 
    {
        "desc" : "conn135891",
        "threadId" : "0xd04ed7a80",
        "connectionId" : 135891,
        "opid" : 22719284,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._attianjin30_dfc-r_dtc-r_tof2_groupcf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:33530",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(3000658)
                }
            }
        }
    }, 
    {
        "desc" : "conn135673",
        "threadId" : "0xd04ed6220",
        "connectionId" : 135673,
        "opid" : 22718185,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._afwroclaw123_atlondyn49_df2016-06_dt2016-06_tof2_groupdfdt",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1),
                "T#df" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:59925",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(16503737)
                }
            }
        }
    }, 
    {
        "desc" : "conn135989",
        "threadId" : "0x10d44443c0",
        "connectionId" : 135989,
        "opid" : 22719240,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf28_atmarakesz93_dfc-r_dtc-r_tof2_groupaf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:34367",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(4500947)
                }
            }
        }
    }, 
    {
        "desc" : "conn135410",
        "threadId" : "0x2d66220",
        "connectionId" : 135410,
        "opid" : 22717853,
        "active" : true,
        "secs_running" : 22,
        "microsecs_running" : NumberLong(22406019),
        "op" : "query",
        "ns" : "ctp.$cmd",
        "query" : {
            "createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
            "indexes" : [ 
                {
                    "key" : {
                        "expireAt" : 1
                    },
                    "name" : "exAt",
                    "background" : true,
                    "expireAfterSeconds" : 0
                }
            ]
        },
        "client" : "0.0.0.0:0",
        "numYields" : 0,
        "locks" : {
            "Global" : "w",
            "Database" : "W"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(376227),
                    "w" : NumberLong(15477)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(180375),
                    "w" : NumberLong(15476),
                    "W" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "W" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "W" : NumberLong(22004935)
                }
            },
            "Collection" : {
                "acquireCount" : {
                    "r" : NumberLong(180375),
                    "w" : NumberLong(15476)
                }
            }
        }
    }, 
    {
        "desc" : "conn135961",
        "threadId" : "0x10d4442b60",
        "connectionId" : 135961,
        "opid" : 22718537,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._cf5_atattawapiskat23_dfc-r_dtc-r_tof2_groupaf",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:34029",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(15002978)
                }
            }
        }
    }, 
    {
        "desc" : "conn135905",
        "threadId" : "0xc3e5d6000",
        "connectionId" : 135905,
        "opid" : 22718186,
        "active" : false,
        "op" : "query",
        "ns" : "ctp._afwarszawa123_atdubrownik61_df2016-08_dt2016-08_tof2_groupdfdt",
        "query" : {
            "$query" : {},
            "$orderby" : {
                "T#mp" : NumberLong(1),
                "T#df" : NumberLong(1)
            }
        },
        "client" : "192.168.1.33:33638",
        "numYields" : 0,
        "locks" : {
            "Global" : "r",
            "Database" : "r"
        },
        "waitingForLock" : true,
        "lockStats" : {
            "Global" : {
                "acquireCount" : {
                    "r" : NumberLong(2)
                }
            },
            "Database" : {
                "acquireCount" : {
                    "r" : NumberLong(1)
                },
                "acquireWaitCount" : {
                    "r" : NumberLong(1)
                },
                "timeAcquiringMicros" : {
                    "r" : NumberLong(16503305)
                }
            }
        }
    }
]

db.serverStatus()["rocksdb"];

{
    "stats" : [ 
        "", 
        "** Compaction Stats [default] **", 
        "Level    Files   Size(MB) Score Read(GB)  Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) Comp(cnt) Avg(sec) Stall(cnt)  KeyIn KeyDrop", 
        "---------------------------------------------------------------------------------------------------------------------------------------------------------------------", 
        "  L0      0/0       0.00   0.0      0.0     0.0      0.0       1.4      1.4       0.0   0.0      0.0    120.1        12        39    0.312          0       0      0", 
        "  L4      0/0       0.00   0.0      1.8     1.8      0.0       1.7      1.7       0.0   1.0    102.0     99.7        18        11    1.606          7     21M   153K", 
        "  L5     15/0     620.47   1.0      6.6     1.4      5.2       5.5      0.3       0.0   3.9     44.4     37.0       152        25    6.086          0    110M   840K", 
        "  L6    106/0    6401.43   0.0      3.5     0.3      3.3       3.3     -0.0       0.0  12.6     25.9     23.7       140         7   20.057          0    162M    14M", 
        " Sum    121/0    7021.90   0.0     11.9     3.4      8.5      11.9      3.4       0.0   8.3     37.8     37.8       322        82    3.932          7    295M    15M", 
        " Int      0/0       0.00   0.0      0.0     0.0      0.0       0.0      0.0       0.0   0.0      0.0      0.0         0         0    0.000          0       0      0", 
        "Flush(GB): cumulative 1.429, interval 0.000", 
        "Stalls(count): 0 level0_slowdown, 0 level0_slowdown_with_compaction, 0 level0_numfiles, 0 level0_numfiles_with_compaction, 0 pending_compaction_bytes, 0 memtable_compaction, 7 leveln_slowdown_soft, 0 leveln_slowdown_hard", 
        "", 
        "** DB Stats **", 
        "Uptime(secs): 34952.0 total, 0.2 interval", 
        "Cumulative writes: 4990K writes, 17M keys, 4989K batches, 1.0 writes per batch, ingest: 2.02 GB, 0.06 MB/s", 
        "Cumulative WAL: 4990K writes, 0 syncs, 4990122.00 writes per sync, written: 2.02 GB, 0.06 MB/s", 
        "Cumulative compaction: 11.90 GB write, 0.35 MB/s write, 11.90 GB read, 0.35 MB/s read, 322.4 seconds", 
        "Cumulative stall: 00:00:3.548 H:M:S, 0.0 percent", 
        "Interval writes: 0 writes, 0 keys, 0 batches, 0.0 writes per batch, ingest: 0.00 MB, 0.00 MB/s", 
        "Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 MB, 0.00 MB/s", 
        "Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds", 
        "Interval stall: 00:00:0.000 H:M:S, 0.0 percent"
    ],
    "num-immutable-mem-table" : "0",
    "mem-table-flush-pending" : "0",
    "compaction-pending" : "0",
    "background-errors" : "0",
    "cur-size-active-mem-table" : "33MB",
    "cur-size-all-mem-tables" : "33MB",
    "num-entries-active-mem-table" : "185495",
    "num-entries-imm-mem-tables" : "0",
    "estimate-table-readers-mem" : "91MB",
    "num-snapshots" : "1",
    "oldest-snapshot-time" : "1465911051",
    "num-live-versions" : "1",
    "total-live-recovery-units" : 60,
    "block-cache-usage" : "34GB",
    "transaction-engine-keys" : NumberLong(4210),
    "transaction-engine-snapshots" : NumberLong(1),
    "thread-status" : []
}

db.serverStatus()['globalLock'];

{
    "totalTime" : NumberLong(34952090000),
    "currentQueue" : {
        "total" : 57,
        "readers" : 56,
        "writers" : 1
    },
    "activeClients" : {
        "total" : 124,
        "readers" : 57,
        "writers" : 1
    }
}

来自 mongostat 的屏幕: 来自 mongostat 的屏幕

此致

4

1 回答 1

1

创建新系列毫无意义。这是一个工程缺陷。这个数量的集合肯定会冻结数据库服务器,因为它被迫遍历可用集合列表并检查列表中是否存在集合。另外,我相信这涉及到一些操作,例如元数据更新。

从操作日志中可以清楚地看出,构建索引需要大量时间

"query" : {
        "createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
        "indexes" : [ 
            {
                "key" : {
                    "expireAt" : 1
                },
                "name" : "exAt",
                "background" : true,
                "expireAfterSeconds" : 0
            }
        ]
    }

此外,获取锁之类的事情需要时间,这对于创建新的命名空间和索引构建是必要的。同样,像这样的参数"expireAfterSeconds" : 0在这样的规模上并不明智。基本上,您必须每 60 秒运行 140 万个计时器来查找和修剪过期记录并确保重建索引(请参阅 mongostat 上的那些删除)。

请考虑减少集合数量或在部署之间拆分集合的更改。您还可以删除非活动集合的索引。此外,明智的做法是丢弃所有空的和未使用的集合。

要立即解决,您必须找到当前的瓶颈:RAM、CPU 或 IOpS。你有额外的内存,你可以把它给 MongoDB,这将有助于缓存更多,避免不必要的读取。通过测量您的 IOpSiostat以查看您的驱动器的繁忙程度。

于 2017-08-14T16:12:38.313 回答