0

我有 Go 代码,我已经为它编写了单元测试。测试运行 6.5 秒,它会产生巨大的 CPU 负载。我在 docker 容器中运行测试,它的 CPU 使用率达到 400-500%。我得到了 cpuprofile,它显示了以下结果:

(pprof) top
Showing nodes accounting for 4260ms, 69.49% of 6130ms total
Dropped 80 nodes (cum <= 30.65ms)
Showing top 10 nodes out of 79
      flat  flat%   sum%        cum   cum%
    1250ms 20.39% 20.39%     1250ms 20.39%  runtime._ExternalCode /usr/local/go/src/runtime/proc.go
     640ms 10.44% 30.83%      960ms 15.66%  runtime.scanobject /usr/local/go/src/runtime/mgcmark.go
     580ms  9.46% 40.29%      580ms  9.46%  runtime.memmove /usr/local/go/src/runtime/memmove_amd64.s
     420ms  6.85% 47.15%      730ms 11.91%  runtime.mapassign_faststr /usr/local/go/src/runtime/hashmap_fast.go
     310ms  5.06% 52.20%      310ms  5.06%  runtime.duffcopy /usr/local/go/src/runtime/duff_amd64.s
     260ms  4.24% 56.44%      260ms  4.24%  runtime.heapBitsForObject /usr/local/go/src/runtime/mbitmap.go
     220ms  3.59% 60.03%      220ms  3.59%  runtime.scanobject /usr/local/go/src/runtime/mbitmap.go
     200ms  3.26% 63.30%     1630ms 26.59%  runtime.mallocgc /usr/local/go/src/runtime/malloc.go
     200ms  3.26% 66.56%      200ms  3.26%  runtime.memclrNoHeapPointers /usr/local/go/src/runtime/memclr_amd64.s
     180ms  2.94% 69.49%     2250ms 36.70%  runtime.systemstack /usr/local/go/src/runtime/asm_amd64.s

我不明白这里的 ExternalCode 是什么。在这个结果中也没有我的实际代码的功能。我知道我的一些函数可能会调用这些内置的 go 东西,最终可能会在那里花费大量时间。但是它并没有给我方向来改进我的代码的性能。这个结果有什么明显的地方需要注意吗?

我尝试list在 pprof 交互模式下使用命令来显示 CPU 使用率高的功能。

(pprof) list _ExternalCode
Total: 6.13s
ROUTINE ======================== runtime._ExternalCode in /usr/local/go/src/runtime/proc.go
     1.25s      1.25s (flat, cum) 20.39% of Total
         .          .   3231:   signalLock uint32
         .          .   3232:   hz         int32
         .          .   3233:}
         .          .   3234:
         .          .   3235:func _System()                    { _System() }
     1.25s      1.25s   3236:func _ExternalCode()              { _ExternalCode() }
         .          .   3237:func _LostExternalCode()          { _LostExternalCode() }
         .          .   3238:func _GC()                        { _GC() }
         .          .   3239:func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
         .          .   3240:
         .          .   3241:// Counts SIGPROFs received while in atomic64 critical section, on mips{,le}
(pprof) 
(pprof) 
(pprof) 
(pprof) list scanobject
Total: 6.13s
ROUTINE ======================== runtime.scanobject in /usr/local/go/src/runtime/mbitmap.go
     220ms      220ms (flat, cum)  3.59% of Total
         .          .    479:
         .          .    480:// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
         .          .    481:// The result includes in its higher bits the bits for subsequent words
         .          .    482:// described by the same bitmap byte.
         .          .    483:func (h heapBits) bits() uint32 {
         .          .    484:   // The (shift & 31) eliminates a test and conditional branch
         .          .    485:   // from the generated code.
     200ms      200ms    486:   return uint32(*h.bitp) >> (h.shift & 31)
         .          .    487:}
         .          .    488:
         .          .    489:// morePointers returns true if this word and all remaining words in this object
         .          .    490:// are scalars.
         .          .    491:// h must not describe the second word of the object.
ROUTINE ======================== runtime.scanobject in /usr/local/go/src/runtime/mgcmark.go
     640ms      960ms (flat, cum) 15.66% of Total
         .          .   1155:           n = maxObletBytes
         .          .   1156:       }
         .          .   1157:   }
         .          .   1158:
         .          .   1159:   var i uintptr
     100ms      100ms   1160:   for i = 0; i < n; i += sys.PtrSize {
         .          .   1161:       // Find bits for this word.
      10ms       10ms   1162:       if i != 0 {
         .          .   1163:           // Avoid needless hbits.next() on last iteration.
      10ms       10ms   1164:           hbits = hbits.next()
         .          .   1165:       }
         .          .   1166:       // Load bits once. See CL 22712 and issue 16973 for discussion.
         .          .   1167:       bits := hbits.bits()
         .          .   1168:       // During checkmarking, 1-word objects store the checkmark
         .          .   1169:       // in the type bit for the one word. The only one-word objects
         .          .   1170:       // are pointers, or else they'd be merged with other non-pointer
         .          .   1171:       // data into larger allocations.
      80ms       80ms   1172:       if i != 1*sys.PtrSize && bits&bitScan == 0 {
         .          .   1173:           break // no more pointers in this object
         .          .   1174:       }
      10ms       10ms   1175:       if bits&bitPointer == 0 {
         .          .   1176:           continue // not a pointer
         .          .   1177:       }
         .          .   1178:
         .          .   1179:       // Work here is duplicated in scanblock and above.
         .          .   1180:       // If you make changes here, make changes there too.
      20ms       20ms   1181:       obj := *(*uintptr)(unsafe.Pointer(b + i))
         .          .   1182:
         .          .   1183:       // At this point we have extracted the next potential pointer.
         .          .   1184:       // Check if it points into heap and not back at the current object.
     340ms      340ms   1185:       if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
         .          .   1186:           // Mark the object.
      60ms      300ms   1187:           if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
      10ms       90ms   1188:               greyobject(obj, b, i, hbits, span, gcw, objIndex)
         .          .   1189:           }
         .          .   1190:       }
         .          .   1191:   }
         .          .   1192:   gcw.bytesMarked += uint64(n)
         .          .   1193:   gcw.scanWork += int64(i)
4

0 回答 0