1

我正在尝试解决 leetcode 问题排列。但是当我用 -benchmem 进行测试时,我发现它分配太多,达到 1957 年分配/操作时permute([]int{1,2,3,4,5,6})

我发现它在生成 sub-nums 目标时逃到了堆上。即使我尝试分配 [6]int,并使用 unsafe 包来构建切片,它仍然是moved to heap

我的问题是,为什么切片逃逸到堆,我怎么能在堆栈上分配切片?

这是我的代码:

package main

import (
    "fmt"
    "reflect"
    "unsafe"
)


func permute(nums []int) [][]int {
    resLen := 1
    for i := 1; i<= len(nums);i ++{
        resLen *= i
    }
    // pre allocate
    res := make([][]int, resLen)
    for i := range res{
        res[i] = make([]int, 0, len(nums))
    }

    build(res, nums)

    return res
}

func build(res [][]int,targets []int){
    step := len(res) / len(targets)
    for i := range targets{
        for j := i*step; j < (i+1) * step; j ++{
            res[j] = append(res[j], targets[i])
        }
        if len(targets) != 1{
            var ab = [6]int{}
            var buff []int
            var bp  *reflect.SliceHeader
            bp = (*reflect.SliceHeader)(unsafe.Pointer(&buff))
            bp.Data = uintptr(unsafe.Pointer(&ab))
            bp.Cap = 6
            buff = append(buff, targets[:i]...)
            buff = append(buff, targets[i+1:]...)
            build(res[i*step:(i+1)*step], buff)
        }
    }
    return
}

func main() {
    nums := []int{1,2,3}
    res := permute(nums)
    fmt.Println(res)
}

build没有不安全但逃逸到堆的函数:

func build(res [][]int, targets []int) {
    step := len(res) / len(targets)
    for i := range targets {
        for j := i * step; j < (i+1)*step; j++ {
            res[j] = append(res[j], targets[i])
        }
        if len(targets) != 1 {
            buff := make([]int, 0, 6) //  make([]int, 0, 6) escapes to heap
            buff = append(buff, targets[:i]...)
            buff = append(buff, targets[i+1:]...)
            build(res[i*step:(i+1)*step], buff)
        }
    }
    return
}

还有我的测试用例:

package main

import "testing"

func Benchmark(b *testing.B){
    for i:=0;i<b.N;i++{
        permute([]int{1,2,3,4,5,6})
    }
}

当我运行时go build -gcflags="-m",它会报告./main.go:32:8: moved to heap: ab

4

1 回答 1

5

试图颠覆编译器使用unsafe.Pointer只会使转义分析更难完成其工作,从而阻止切片被堆栈分配。只需分配一个切片并在每次循环迭代中重用它:

func build(res [][]int, targets []int) {
    buff := make([]int, 0, 6)
    step := len(res) / len(targets)
    for i := range targets {
        buff = buff[:0]
        for j := i * step; j < (i+1)*step; j++ {
            res[j] = append(res[j], targets[i])
        }
        if len(targets) != 1 {
            buff = append(buff, targets[:i]...)
            buff = append(buff, targets[i+1:]...)
            build(res[i*step:(i+1)*step], buff)
        }
    }
    return
}

这可以由编译器正确优化

./main.go:26:17: make([]int, 0, 6) does not escape

并且只会产生所需的分配:

Benchmark-8  44607  26838 ns/op  52992 B/op  721 allocs/op
于 2021-07-12T18:25:26.110 回答