我们有大型 MongoDB 数据库(大约 140 万个集合)、MongoDB 3.0、引擎 RocksDB、操作系统 Ubuntu 14.04。
该数据库位于具有 16 个内核和 108 GB RAM 的虚拟机 (VmWare vCloud) 上(当前 mongoDB 使用 70GB 内存而没有交换)。
生产设置选项:
- 专用分区上的数据 - XFS 文件系统
- 已启用 transparent_hugepage - 从不
- transparent_hugepage 碎片整理 - 从不
数据库统计:
{
"db" : "ctp",
"collections" : 1369486,
"objects" : 20566852,
"avgObjSize" : 1126.82749999854,
"dataSize" : 23175294422,
"storageSize" : 23231888384,
"numExtents" : 0,
"indexes" : 6686175,
"indexSize" : 685981393,
"ok" : 1
}
样本集合大小:
{
"ns" : "ctp._cf123_ct49_dfc-r_dtc-r_tof2_groupat",
"count" : 33,
"size" : 38172,
"avgObjSize" : 1156,
"storageSize" : 38144,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 6312,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 6246,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}
{
"ns" : "ctp._afpoznan123_atlondyn49_df2016-09_dt2016-09_tof2_groupdfdt",
"count" : 188,
"size" : 208677,
"avgObjSize" : 1109,
"storageSize" : 208640,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 7945,
"indexSizes" : {
"_id_" : 2845,
"exAt" : 256,
"_smp" : 160,
"_smpdf" : 352,
"unique" : 4332
},
"ok" : 1
}
{
"ns" : "ctp._cf123_ct42_dfc-r_dtc-r_tof2_groupat",
"count" : 27,
"size" : 30400,
"avgObjSize" : 1125,
"storageSize" : 30208,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 84,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 18,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}
每隔 5 分钟定期运行脚本,该脚本会写入这些集合并在此集合不存在时创建新集合(集合名称基于这些集合中的数据)并创建索引。
我们注意到,在将数据写入集合期间,该服务器出现了一些冻结。这种冻结可能需要 5 到 60 秒。
有没有人遇到过这个问题并可以帮助我们?
以下是冻结时刻的一些日志:
db.currentOP();
"opid" : 22717868,
"active" : false,
"op" : "query",
"ns" : "ctp._cf115_atboma25_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33832",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(22004831)
}
}
}
},
{
"desc" : "conn135907",
"threadId" : "0xc3e5d64e0",
"connectionId" : 135907,
"opid" : 22719375,
"active" : true,
"secs_running" : 0,
"microsecs_running" : NumberLong(223601),
"op" : "query",
"ns" : "top_search.top_searches",
"query" : {
"$msg" : "query not recording (too large)"
},
"planSummary" : "IXSCAN { T#df: 1, T#dt: 1 }",
"client" : "192.168.1.33:33648",
"numYields" : 170,
"locks" : {
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(342)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(171)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(171)
}
}
}
},
{
"desc" : "conn135959",
"threadId" : "0x10d4445260",
"connectionId" : 135959,
"opid" : 22718533,
"active" : false,
"op" : "query",
"ns" : "ctp._afoxford-house23_attamarindo32_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:34022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15003580)
}
}
}
},
{
"desc" : "conn135829",
"threadId" : "0x10d4445740",
"connectionId" : 135829,
"opid" : 22717923,
"active" : false,
"op" : "query",
"ns" : "ctp._ct123_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33026",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21004810)
}
}
}
},
{
"desc" : "conn135781",
"threadId" : "0x2d678e0",
"connectionId" : 135781,
"opid" : 22718920,
"active" : false,
"op" : "query",
"ns" : "ctp._cf1147_atrostock36_df2016-06_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60874",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(12002770)
}
}
}
},
{
"desc" : "conn135870",
"threadId" : "0xd04ed5d40",
"connectionId" : 135870,
"opid" : 22719172,
"active" : false,
"op" : "query",
"ns" : "ctp._cf61_atpristina131_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33369",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7001590)
}
}
}
},
{
"desc" : "conn135687",
"threadId" : "0xc3e5d7380",
"connectionId" : 135687,
"opid" : 22717925,
"active" : false,
"op" : "query",
"ns" : "ctp._cf105_athana156_df2016-06_dt2016-06_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21003871)
}
}
}
},
{
"desc" : "conn135754",
"threadId" : "0xd04ed5860",
"connectionId" : 135754,
"opid" : 22718485,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atdhaka1113_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60603",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15503084)
}
}
}
},
{
"desc" : "conn135644",
"threadId" : "0xc3e5d9c20",
"connectionId" : 135644,
"opid" : 22719073,
"active" : false,
"op" : "query",
"ns" : "ctp._afgenua71_ataarhus37_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59698",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7501602)
}
}
}
},
{
"desc" : "conn135891",
"threadId" : "0xd04ed7a80",
"connectionId" : 135891,
"opid" : 22719284,
"active" : false,
"op" : "query",
"ns" : "ctp._attianjin30_dfc-r_dtc-r_tof2_groupcf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33530",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(3000658)
}
}
}
},
{
"desc" : "conn135673",
"threadId" : "0xd04ed6220",
"connectionId" : 135673,
"opid" : 22718185,
"active" : false,
"op" : "query",
"ns" : "ctp._afwroclaw123_atlondyn49_df2016-06_dt2016-06_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59925",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503737)
}
}
}
},
{
"desc" : "conn135989",
"threadId" : "0x10d44443c0",
"connectionId" : 135989,
"opid" : 22719240,
"active" : false,
"op" : "query",
"ns" : "ctp._cf28_atmarakesz93_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34367",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(4500947)
}
}
}
},
{
"desc" : "conn135410",
"threadId" : "0x2d66220",
"connectionId" : 135410,
"opid" : 22717853,
"active" : true,
"secs_running" : 22,
"microsecs_running" : NumberLong(22406019),
"op" : "query",
"ns" : "ctp.$cmd",
"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
},
"client" : "0.0.0.0:0",
"numYields" : 0,
"locks" : {
"Global" : "w",
"Database" : "W"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(376227),
"w" : NumberLong(15477)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476),
"W" : NumberLong(1)
},
"acquireWaitCount" : {
"W" : NumberLong(1)
},
"timeAcquiringMicros" : {
"W" : NumberLong(22004935)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476)
}
}
}
},
{
"desc" : "conn135961",
"threadId" : "0x10d4442b60",
"connectionId" : 135961,
"opid" : 22718537,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atattawapiskat23_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34029",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15002978)
}
}
}
},
{
"desc" : "conn135905",
"threadId" : "0xc3e5d6000",
"connectionId" : 135905,
"opid" : 22718186,
"active" : false,
"op" : "query",
"ns" : "ctp._afwarszawa123_atdubrownik61_df2016-08_dt2016-08_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:33638",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503305)
}
}
}
}
]
db.serverStatus()["rocksdb"];
{
"stats" : [
"",
"** Compaction Stats [default] **",
"Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) Comp(cnt) Avg(sec) Stall(cnt) KeyIn KeyDrop",
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------",
" L0 0/0 0.00 0.0 0.0 0.0 0.0 1.4 1.4 0.0 0.0 0.0 120.1 12 39 0.312 0 0 0",
" L4 0/0 0.00 0.0 1.8 1.8 0.0 1.7 1.7 0.0 1.0 102.0 99.7 18 11 1.606 7 21M 153K",
" L5 15/0 620.47 1.0 6.6 1.4 5.2 5.5 0.3 0.0 3.9 44.4 37.0 152 25 6.086 0 110M 840K",
" L6 106/0 6401.43 0.0 3.5 0.3 3.3 3.3 -0.0 0.0 12.6 25.9 23.7 140 7 20.057 0 162M 14M",
" Sum 121/0 7021.90 0.0 11.9 3.4 8.5 11.9 3.4 0.0 8.3 37.8 37.8 322 82 3.932 7 295M 15M",
" Int 0/0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0.000 0 0 0",
"Flush(GB): cumulative 1.429, interval 0.000",
"Stalls(count): 0 level0_slowdown, 0 level0_slowdown_with_compaction, 0 level0_numfiles, 0 level0_numfiles_with_compaction, 0 pending_compaction_bytes, 0 memtable_compaction, 7 leveln_slowdown_soft, 0 leveln_slowdown_hard",
"",
"** DB Stats **",
"Uptime(secs): 34952.0 total, 0.2 interval",
"Cumulative writes: 4990K writes, 17M keys, 4989K batches, 1.0 writes per batch, ingest: 2.02 GB, 0.06 MB/s",
"Cumulative WAL: 4990K writes, 0 syncs, 4990122.00 writes per sync, written: 2.02 GB, 0.06 MB/s",
"Cumulative compaction: 11.90 GB write, 0.35 MB/s write, 11.90 GB read, 0.35 MB/s read, 322.4 seconds",
"Cumulative stall: 00:00:3.548 H:M:S, 0.0 percent",
"Interval writes: 0 writes, 0 keys, 0 batches, 0.0 writes per batch, ingest: 0.00 MB, 0.00 MB/s",
"Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 MB, 0.00 MB/s",
"Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds",
"Interval stall: 00:00:0.000 H:M:S, 0.0 percent"
],
"num-immutable-mem-table" : "0",
"mem-table-flush-pending" : "0",
"compaction-pending" : "0",
"background-errors" : "0",
"cur-size-active-mem-table" : "33MB",
"cur-size-all-mem-tables" : "33MB",
"num-entries-active-mem-table" : "185495",
"num-entries-imm-mem-tables" : "0",
"estimate-table-readers-mem" : "91MB",
"num-snapshots" : "1",
"oldest-snapshot-time" : "1465911051",
"num-live-versions" : "1",
"total-live-recovery-units" : 60,
"block-cache-usage" : "34GB",
"transaction-engine-keys" : NumberLong(4210),
"transaction-engine-snapshots" : NumberLong(1),
"thread-status" : []
}
db.serverStatus()['globalLock'];
{
"totalTime" : NumberLong(34952090000),
"currentQueue" : {
"total" : 57,
"readers" : 56,
"writers" : 1
},
"activeClients" : {
"total" : 124,
"readers" : 57,
"writers" : 1
}
}
此致