python - 与 PyTorch 相比，Numpy 有限差分计算速度非常慢

Question

我的目标是通过有限差分模拟反应扩散微分方程，但从二维波动方程开始。由于这需要相当长的时间，我想尽可能快地完成基本实现，并且一直在尝试使用 numpy/cupy/numba/pytorch 来查看哪个是最快的。让我感到非常奇怪的是，我使用 PyTorch 的实现在迭代运行所需的时间内几乎是恒定的，但是 numpy 变得越来越慢。我假设 torch 在后台运行某种智能垃圾收集，并且 numpy 不会删除未使用的数组（在内存使用量增加中可见，而 torch 是恒定的）。然而，即使在手动优化 numpy 代码以使其就地运行并删除未使用的数组之后，它仍然没有

下面是 numpy 代码，以及注释中的火炬代码。

import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import laplace

# import torch
# import torch.nn.functional as F
import matplotlib.animation as animation
import tqdm


def I(X, Y):
    d = (X - 0.5)**2 + (Y - 0.5)**2
    inner = (d < 0.01)
    # inner = (d < 0.01).float()
    return 0 * X * (1 - inner) + (0 * X + 1) * inner


def laplace(u):
    # u_pad = F.pad(u.unsqueeze(0).unsqueeze_(0), (1, 1, 1, 1), mode='replicate').squeeze()
    u_pad = np.pad(u, (1, 1), mode='edge')
    u = u_pad[2:, 1:-1] + u_pad[1:-1, 2:] + u_pad[:-2, 1:-1] + u_pad[1:-1, :-2] - 4 * u
    u[:, 0] = u[0, :] = u[:, -1] = u[-1, :] = 0
    return u


def initial_step(u, C):
    return u - laplace(u) * C**2 / 2

def step(u, u_prev, C):
    return -u_prev + 2 * u + laplace(u) * C**2


class Simulate2d():
    def __init__(self, init_fn, initial_step_fn, step_fn, c=0.5, T=1, n_grid=1000, n_steps=200, n_levels=30):
        # x = torch.linspace(0, 1, n_grid)
        x = np.linspace(0, 1, n_grid)
        dx = x[1] - x[0]
        dt = T / n_steps

        self.t = 0
        self.n_steps = n_steps

        # self.mesh = torch.meshgrid(x, x)
        # self.cvals = torch.linspace(-1.1, 1.1, n_levels)
        self.mesh = np.meshgrid(x, x)
        self.cvals = np.linspace(-1.1, 1.1, n_levels)

        self.C = (c * dt / dx)

        self.U_prev = None
        self.U = init_fn(*self.mesh)
        self.cont = ax.contourf(*self.mesh, self.U, self.cvals)

        self.step_fn = step_fn
        self.initial_step_fn = initial_step_fn
        self.pbar = None

    def forward(self, u, u_prev):
        return self.initial_step_fn(u, self.C) if self.t == 0 else self.step_fn(u, u_prev, self.C)

    def animation_step(self, t):
        self.t = t
        for c in self.cont.collections:
            c.remove()

        u_next = self.forward(self.U, self.U_prev)
        # del self.U_prev
        self.U, self.U_prev = u_next, self.U
        self.cont = plt.contourf(*self.mesh, self.U, self.cvals)

        plt.title(f't={t}')
        if self.pbar is not None:
            self.pbar.update(1)

        return self.cont

    def test(self):
        for t in tqdm.trange(self.n_steps):
            self.animation_step(t)

    def create_animation(self, file, dpi=300):
        self.pbar = tqdm.tqdm(total=self.n_steps)
        ani = animation.FuncAnimation(fig, self.animation_step, frames=self.n_steps)
        ani.save(file, dpi=dpi)
        print(f'{file} saved')
        return ani


fig, ax = plt.subplots(figsize=(15, 15))
plt.axis('equal')
plt.xlim(0, 1)
plt.ylim(0, 1)

simu = Simulate2d(init_fn=I, initial_step_fn=initial_step, step_fn=step)
# simu.create_animation('reaction-diffusion.gif')
simu.test()

> Executing task: python -m IPython /home/fe/willis/git/reaction-diffusion/reaction_diffusion_torch.py --no-banner --no-confirm-exit <

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:02<00:00,  3.20it/s]

Terminal will be reused by tasks, press any key to close it.

> Executing task: python -m IPython /home/fe/willis/git/reaction-diffusion/reaction_diffusion.py --no-banner --no-confirm-exit <

...[hiding numpy runtime warnings]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [02:54<00:00,  1.15it/s]

Terminal will be reused by tasks, press any key to close it.

这是经过修改以使操作就地的部分。

def laplace(u):
    u_pad = np.pad(u, (1, 1), mode='edge')
    u = - 4 * u
    u += u_pad[2:, 1:-1]
    u += u_pad[1:-1, 2:]
    u += u_pad[:-2, 1:-1]
    u += u_pad[1:-1, :-2]
    u[:, 0] = u[0, :] = u[:, -1] = u[-1, :] = 0
    del u_pad
    return u

def step(u, u_prev, C):
    lap = laplace(u)
    lap *= C**2
    out = 2 * u
    out += lap
    out -= u_prev
    del lap
    return out

    ...
    def animation_step(self, t):
        self.t = t
        for c in self.cont.collections:
            c.remove()

        u_next = self.forward(self.U, self.U_prev)
        del self.U_prev
        ...

基本上我在问：为了让 numpy 代码与 PyTorch 实现一样快，我需要做什么？

python - 与 PyTorch 相比，Numpy 有限差分计算速度非常慢

0 回答 0

Related

Reference