python - 在 n 维数组上使用带有 meshgrid 的 scipy interpn

Question

我正在尝试翻译大型 4D 数组的 Matlab“interpn”插值，但 Matlab 和 Python 之间的公式差异很大。几年前这里有一个很好的问题/答案，我一直在尝试使用。我想我快到了，但显然我的网格插值器仍然没有正确制定。

我尽可能按照上面链接答案中给出的示例对我的代码示例进行建模，同时使用我实际工作的尺寸。唯一的变化是我将 rollaxis 切换为 moveaxis，因为前者已被弃用。

本质上，给定 4D 数组 skyrad0（取决于第一个代码块中定义的四个元素）以及第三个块中定义的两个常量和两个 1D 数组，我想要插值的 2D 结果。

from scipy.interpolate import interpn
import numpy as np

# Define the data space in the 4D skyrad0 array
solzen = np.arange(0,70,10)     # 7
aod = np.arange(0,0.25,0.05)    # 5
index = np.arange(1,92477,1)    # 92476
wave = np.arange(350,1050,5)    # 140

# Simulated skyrad for the values above
skyrad0 = np.random.rand(
    solzen.size,aod.size,index.size,wave.size) # 7, 5, 92476, 140

# Data space for desired output values of skyrad 
# with interpolation between input data space
solzen0 = 30                    # 1
aod0 = 0.1                      # 1
index0 = index                  # 92476
wave0 = np.arange(350,1050,10)  # 70

# Matlab
# result = squeeze(interpn(solzen, aod, index, wave,
#                   skyrad0,
#                   solzen0, aod0, index0, wave0))

# Scipy
points = (solzen, aod, index, wave)             # 7, 5, 92476, 140
interp_mesh = np.array(
    np.meshgrid(solzen0, aod0, index0, wave0))  # 4, 1, 1, 92476, 70
interp_points = np.moveaxis(interp_mesh, 0, -1) # 1, 1, 92476, 70, 4
interp_points = interp_points.reshape(
    (interp_mesh.size // interp_mesh.shape[3], 
    interp_mesh.shape[3]))                      # 280, 92476

result = interpn(points, skyrad0, interp_points)

我期待一个 4D 数组“结果”，我可以 numpy.squeeze 到我需要的 2D 答案中，但是 interpn 会产生错误：

ValueError: The requested sample points xi have dimension 92476, but this RegularGridInterpolator has dimension 4

在这个例子中我最模糊的地方是查询点网格的结构，以及将第一个维度移动到末尾并重新塑造它。这里还有更多内容，但我仍然不清楚如何将其应用于这个问题。

如果有人能在我的配方中发现明显的低效率，那将是一个奖励。我需要在许多不同的结构上运行这种类型的插值数千次——甚至扩展到 6D——所以效率很重要。

更新下面的答案非常优雅地解决了这个问题。然而，随着计算和数组变得更加复杂，另一个问题开始出现，即数组中的元素似乎不是单调增加的问题。这是在 6D 中重构的问题：

# Data space in the 6D rad_boa array
azimuth = np.arange(0, 185, 5) # 37
senzen = np.arange(0, 185, 5) # 37
wave = np.arange(350,1050,5)    # 140
# wave = np.array([350, 360, 370, 380, 390, 410, 440, 470, 510, 550, 610, 670, 750, 865, 1040, 1240, 1640, 2250]) # 18
solzen = np.arange(0,65,5)     # 13
aod = np.arange(0,0.55,0.05)    # 11
wind = np.arange(0, 20, 5)      # 4

# Simulated rad_boa
rad_boa = np.random.rand(
    azimuth.size,senzen.size,wave.size,solzen.size,aod.size,wind.size,) # 37, 37, 140/18, 13, 11, 4

azimuth0 = 135              # 1
senzen0 = 140               # 1
wave0 = np.arange(350,1010,10) # 66
solzen0 = 30                # 1
aod0 = 0.1                  # 1
wind0 = 10                  # 1

da = xr.DataArray(name='Radiance_BOA',
                data=rad_boa,
                dims=['azimuth','senzen','wave','solzen','aod','wind'],
                coords=[azimuth,senzen,wave,solzen,aod,wind])

rad_inc_scaXR = da.loc[azimuth0,senzen0,wave0,solzen0,aod0,wind0].squeeze()

就目前而言，它会运行，但是如果将 wave 的定义更改为注释行，则会引发错误：

KeyError: "not all values found in index 'wave'"

最后，为了回应下面的评论（并帮助提高效率），我将包含实际构建这个“rad_boa”6D 数组的 HDF5 文件（在 Matlab 中创建）的结构（上面的这个例子只使用了模拟随机大批）。实际数据库读入 Xarray 如下：

sdb = xr.open_dataset(db_path, group='sdb')

生成的 Xarray 看起来像这样：

score 2 · Accepted Answer

为什么是 ValueError？

首先，scipy.interpolate.interpn要求interp_points.shape[-1]与问题中的维数相同。这就是为什么你ValueError从你的代码片段中得到一个 - 你interp_points有 92476 as n_dims，这与实际的暗淡数量（4）发生冲突。

快速解决

您可以通过简单地更改操作顺序来修复此代码段。你试图过早地挤压——如果你在 interp之后挤压：

points = (solzen, aod, index, wave)                 # 7, 5, 92476, 140
mg = np.meshgrid(solzen0, aod0, index0, wave0)      # 4, 1, 1, 92476, 70
interp_points = np.moveaxis(mg, 0, -1)              # 1, 1, 92476, 70, 4
result_presqueeze = interpn(points, 
                            skyrad0, interp_points) # 1, 1, 92476, 70
result = np.squeeze(result_presqueeze,
                    axis=(0,1))                     # 92476, 70

我已经替换interp_mesh为mghere，并删除了np.array（它不是必需的，因为np.meshgrid返回一个ndarray对象）。

对性能的评论

xarray我认为您的代码片段很好，但是如果您正在处理标记数据，您可能希望使用它，因为它是：

比未标记的numpy数组更具可读性
还可以使用dask处理一些后台工作（如果您正在检查 6D 中的大量数据，这很有用）

更新：哎呀！这应该是.interp，不是.loc。下面的代码片段有效，因为数据点实际上是原始数据点。作为对其他人的警告：

from scipy.interpolate import interpn
import numpy as np
from xarray import DataArray

# Define the data space in the 4D skyrad0 array
solzen = np.arange(0,70,10)     # 7
aod = np.arange(0,0.25,0.05)    # 5
index = np.arange(1,92477,1)    # 92476
wave = np.arange(350,1050,5)    # 140

# Simulated skyrad for the values above
skyrad0 = np.random.rand(
    solzen.size,aod.size,index.size,wave.size) # 7, 5, 92476, 140

# Data space for desired output values of skyrad 
# with interpolation between input data space
solzen0 = 30                    # 1
aod0 = 0.1                      # 1
index0 = index                  # 92476
wave0 = np.arange(350,1050,10)  # 70

def slow():
    points = (solzen, aod, index, wave)                 # 7, 5, 92476, 140
    mg = np.meshgrid(solzen0, aod0, index0, wave0)      # 4, 1, 1, 92476, 70
    interp_points = np.moveaxis(mg, 0, -1)              # 1, 1, 92476, 70, 4
    result_presqueeze = interpn(points, 
                                skyrad0, interp_points) # 1, 1, 92476, 70
    result = np.squeeze(result_presqueeze,
                        axis=(0,1))                     # 92476, 70
    return result

# This function uses .loc instead of .interp!
"""
def fast():
    da = DataArray(name='skyrad0',
                   data=skyrad0,
                   dims=['solzen','aod','index','wave'],
                   coords=[solzen, aod, index, wave])

    result = da.loc[solzen0, aod0, index0, wave0].squeeze()

    return result
"""

通过对 OP 给出的更新代码片段进行一些修改：

import numpy as np
import xarray as xr
from scipy.interpolate import interpn

azimuth = np.arange(0, 185, 5) # 37
senzen = np.arange(0, 185, 5) # 37
#wave = np.arange(350,1050,5)    # 140
wave = np.asarray([350, 360, 370, 380, 390, 410, 440, 470, 510,
                   550, 610, 670, 750, 865, 1040, 1240, 1640, 2250]) # 18
solzen = np.arange(0,65,5)     # 13
aod = np.arange(0,0.55,0.05)    # 11
wind = np.arange(0, 20, 5)      # 4

coords = [azimuth, senzen, wave, solzen, aod, wind]

azimuth0 = 135              # 1
senzen0 = 140               # 1
wave0 = np.arange(350,1010,10) # 66
solzen0 = 30                # 1
aod0 = 0.1                  # 1
wind0 = 10                  # 1

interp_coords = [azimuth0, senzen0, wave0, solzen0, aod0, wind0]

# Simulated rad_boa
rad_boa = np.random.rand(
    *map(lambda x: x.size, coords)) # 37, 37, 140/18, 13, 11, 4

def slow():
    mg = np.meshgrid(*interp_coords)
    interp_points = np.moveaxis(mg, 0, -1)
    result_presqueeze = interpn(coords, 
                                rad_boa, interp_points)
    result = np.squeeze(result_presqueeze)
    return result

def fast():
    da = xr.DataArray(name='Radiance_BOA',
                    data=rad_boa,
                    dims=['azimuth','senzen','wave','solzen','aod','wind'],
                    coords=coords)

    interp_dict = dict(zip(da.dims, interp_coords))

    rad_inc_scaXR = da.interp(**interp_dict).squeeze()
    return rad_inc_scaXR

这是相当快的：

>>> %timeit slow()
2.09 ms ± 85.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
>>> %timeit fast()
343 ms ± 6.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
>>> np.array_equal(slow(),fast())
True

您可以在此处找到有关xarray插值的更多信息。数据集实例具有非常相似的语法。

也可以根据需要更改插值方法（也许，可能希望为离散插值问题提供关键字参数method='nearest'）.interp。

更高级的东西

如果您希望实现更高级的东西，我建议您使用 MARS（多元自适应回归样条）的实现之一。它介于标准回归和插值之间，适用于多维情况。在 Python 3 中，最好的选择是pyearth.

python - 在 n 维数组上使用带有 meshgrid 的 scipy interpn

1 回答 1

Related

Reference