我的目标是通过有限差分模拟反应扩散微分方程,但从二维波动方程开始。由于这需要相当长的时间,我想尽可能快地完成基本实现,并且一直在尝试使用 numpy/cupy/numba/pytorch 来查看哪个是最快的。让我感到非常奇怪的是,我使用 PyTorch 的实现在迭代运行所需的时间内几乎是恒定的,但是 numpy 变得越来越慢。我假设 torch 在后台运行某种智能垃圾收集,并且 numpy 不会删除未使用的数组(在内存使用量增加中可见,而 torch 是恒定的)。然而,即使在手动优化 numpy 代码以使其就地运行并删除未使用的数组之后,它仍然没有
下面是 numpy 代码,以及注释中的火炬代码。
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import laplace
# import torch
# import torch.nn.functional as F
import matplotlib.animation as animation
import tqdm
def I(X, Y):
d = (X - 0.5)**2 + (Y - 0.5)**2
inner = (d < 0.01)
# inner = (d < 0.01).float()
return 0 * X * (1 - inner) + (0 * X + 1) * inner
def laplace(u):
# u_pad = F.pad(u.unsqueeze(0).unsqueeze_(0), (1, 1, 1, 1), mode='replicate').squeeze()
u_pad = np.pad(u, (1, 1), mode='edge')
u = u_pad[2:, 1:-1] + u_pad[1:-1, 2:] + u_pad[:-2, 1:-1] + u_pad[1:-1, :-2] - 4 * u
u[:, 0] = u[0, :] = u[:, -1] = u[-1, :] = 0
return u
def initial_step(u, C):
return u - laplace(u) * C**2 / 2
def step(u, u_prev, C):
return -u_prev + 2 * u + laplace(u) * C**2
class Simulate2d():
def __init__(self, init_fn, initial_step_fn, step_fn, c=0.5, T=1, n_grid=1000, n_steps=200, n_levels=30):
# x = torch.linspace(0, 1, n_grid)
x = np.linspace(0, 1, n_grid)
dx = x[1] - x[0]
dt = T / n_steps
self.t = 0
self.n_steps = n_steps
# self.mesh = torch.meshgrid(x, x)
# self.cvals = torch.linspace(-1.1, 1.1, n_levels)
self.mesh = np.meshgrid(x, x)
self.cvals = np.linspace(-1.1, 1.1, n_levels)
self.C = (c * dt / dx)
self.U_prev = None
self.U = init_fn(*self.mesh)
self.cont = ax.contourf(*self.mesh, self.U, self.cvals)
self.step_fn = step_fn
self.initial_step_fn = initial_step_fn
self.pbar = None
def forward(self, u, u_prev):
return self.initial_step_fn(u, self.C) if self.t == 0 else self.step_fn(u, u_prev, self.C)
def animation_step(self, t):
self.t = t
for c in self.cont.collections:
c.remove()
u_next = self.forward(self.U, self.U_prev)
# del self.U_prev
self.U, self.U_prev = u_next, self.U
self.cont = plt.contourf(*self.mesh, self.U, self.cvals)
plt.title(f't={t}')
if self.pbar is not None:
self.pbar.update(1)
return self.cont
def test(self):
for t in tqdm.trange(self.n_steps):
self.animation_step(t)
def create_animation(self, file, dpi=300):
self.pbar = tqdm.tqdm(total=self.n_steps)
ani = animation.FuncAnimation(fig, self.animation_step, frames=self.n_steps)
ani.save(file, dpi=dpi)
print(f'{file} saved')
return ani
fig, ax = plt.subplots(figsize=(15, 15))
plt.axis('equal')
plt.xlim(0, 1)
plt.ylim(0, 1)
simu = Simulate2d(init_fn=I, initial_step_fn=initial_step, step_fn=step)
# simu.create_animation('reaction-diffusion.gif')
simu.test()
> Executing task: python -m IPython /home/fe/willis/git/reaction-diffusion/reaction_diffusion_torch.py --no-banner --no-confirm-exit <
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:02<00:00, 3.20it/s]
Terminal will be reused by tasks, press any key to close it.
> Executing task: python -m IPython /home/fe/willis/git/reaction-diffusion/reaction_diffusion.py --no-banner --no-confirm-exit <
...[hiding numpy runtime warnings]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [02:54<00:00, 1.15it/s]
Terminal will be reused by tasks, press any key to close it.
这是经过修改以使操作就地的部分。
def laplace(u):
u_pad = np.pad(u, (1, 1), mode='edge')
u = - 4 * u
u += u_pad[2:, 1:-1]
u += u_pad[1:-1, 2:]
u += u_pad[:-2, 1:-1]
u += u_pad[1:-1, :-2]
u[:, 0] = u[0, :] = u[:, -1] = u[-1, :] = 0
del u_pad
return u
def step(u, u_prev, C):
lap = laplace(u)
lap *= C**2
out = 2 * u
out += lap
out -= u_prev
del lap
return out
...
def animation_step(self, t):
self.t = t
for c in self.cont.collections:
c.remove()
u_next = self.forward(self.U, self.U_prev)
del self.U_prev
...
基本上我在问:为了让 numpy 代码与 PyTorch 实现一样快,我需要做什么?