matplotlib - 想要从掩码数组中平滑轮廓

Question

我有一个掩码数组，matplotlib.plt.contourf 使用它在 glabal 地图上投影温度等高线。我试图平滑轮廓，但不幸的是，所提出的解决方案似乎都无法处理蒙面数组。我测试了这些解决方案：

scipy.ndimage.zoom

它们都不起作用（它们也计入掩码值）。有什么办法可以在 maskedArray 上平滑我的轮廓

在尝试了建议的“修复”解决方案后，我添加了这部分，结果没有改变。这是代码（如果有帮助）

import Scientific.IO.NetCDF as S
import mpl_toolkits.basemap as bm
import numpy.ma as MA
import numpy as np
import matplotlib.pyplot as plt
import inpaint

def main():

    fileobj = S.NetCDFFile('Bias.ANN.tas_A1_1.nc', mode='r')

    # take the values
    set1 = {'time', 'lat', 'lon'}
    set2 = set(fileobj.variables.keys())
    set3 = set2 - set1
    datadim = set3.pop()
    print "******************datadim: "+datadim
    data = fileobj.variables[datadim].getValue()[0,:,:]


    lon = fileobj.variables['lon'].getValue()
    lat = fileobj.variables['lat'].getValue()
    fileobj.close()


    data, lon = bm.shiftgrid(180.,data, lon,start=False)
    data = MA.masked_equal(data, 1.0e20)
    #data2 = inpaint.replace_nans(data, 10, 0.25, 2, 'idw')
    #- Make 2-D longitude and latitude arrays:

    [lon2d, lat2d] =np.meshgrid(lon, lat)


    #- Set up map:

    mapproj = bm.Basemap(projection='cyl', 
                       llcrnrlat=-90.0, llcrnrlon=-180.00,
                       urcrnrlat=90.0, urcrnrlon=180.0)
    mapproj.drawcoastlines(linewidth=.5)
    mapproj.drawmapboundary(fill_color='.8')
    #mapproj.drawparallels(N.array([-90, -45, 0, 45, 90]), labels=[1,0,0,0])
    #mapproj.drawmeridians(N.array([0, 90, 180, 270, 360]), labels=[0,0,0,1])
    lonall, latall = mapproj(lon2d, lat2d)

    cmap=plt.cm.Spectral
    #- Make a contour plot of the temperature:
    mymapf = plt.contourf(lonall, latall, data, 20, cmap=cmap)
    #plt.clabel(mymapf, fontsize=12)
    plt.title(cmap.name)
    plt.colorbar(mymapf, orientation='horizontal')

    plt.savefig('sample2.png', dpi=150, edgecolor='red', format='png', bbox_inches='tight', pad_inches=.2)
    plt.close()
if __name__ == "__main__":
  main()

我正在将此代码的输出（第一个图）与 Panoply 的相同数据文件的输出进行比较。放大并更精确地看起来似乎不是平滑度问题，但是 pyplot 模型提供了一个更细的条纹，或者轮廓被更早地切割（外边界清楚地显示了这一点，而内轮廓由于这个事实而不同）。这使得 pyplot 模型看起来不像 Panoply 模型那样平滑。我怎样才能获得（几乎）相同的模型？我区分对了吗？

pyplo 的结果

PanoPly 的结果

score 4 · Accepted Answer

我有类似的问题，谷歌向我指出了这一点：博客文章。基本上，他使用 inpaint 算法来插入缺失值并生成有效的数组进行过滤。

代码在文章末尾，您可以将其保存到站点包（或其他）并将其加载为模块（即inpaint.py）：

import inpaint

filled = inpaint.replace_nans(NANMask, 5, 0.5, 2, 'idw')

我对结果很满意，我想它会很好地适应缺少的温度值。这里还有下一个版本：github，但是代码需要一些清理以供一般使用，因为它是项目的一部分。

为了参考，易于使用和保存，我将在此处发布代码（初始版本）：

# -*- coding: utf-8 -*-

"""A module for various utilities and helper functions"""

import numpy as np
#cimport numpy as np
#cimport cython

DTYPEf = np.float64
#ctypedef np.float64_t DTYPEf_t

DTYPEi = np.int32
#ctypedef np.int32_t DTYPEi_t

#@cython.boundscheck(False) # turn of bounds-checking for entire function
#@cython.wraparound(False) # turn of bounds-checking for entire function
def replace_nans(array, max_iter, tol,kernel_size=1,method='localmean'):
    """Replace NaN elements in an array using an iterative image inpainting algorithm.
The algorithm is the following:
1) For each element in the input array, replace it by a weighted average
of the neighbouring elements which are not NaN themselves. The weights depends
of the method type. If ``method=localmean`` weight are equal to 1/( (2*kernel_size+1)**2 -1 )
2) Several iterations are needed if there are adjacent NaN elements.
If this is the case, information is "spread" from the edges of the missing
regions iteratively, until the variation is below a certain threshold.
Parameters
----------
array : 2d np.ndarray
an array containing NaN elements that have to be replaced
max_iter : int
the number of iterations
kernel_size : int
the size of the kernel, default is 1
method : str
the method used to replace invalid values. Valid options are
`localmean`, 'idw'.
Returns
-------
filled : 2d np.ndarray
a copy of the input array, where NaN elements have been replaced.
"""

#    cdef int i, j, I, J, it, n, k, l
#    cdef int n_invalids

    filled = np.empty( [array.shape[0], array.shape[1]], dtype=DTYPEf)
    kernel = np.empty( (2*kernel_size+1, 2*kernel_size+1), dtype=DTYPEf )

#    cdef np.ndarray[np.int_t, ndim=1] inans
#    cdef np.ndarray[np.int_t, ndim=1] jnans

    # indices where array is NaN
    inans, jnans = np.nonzero( np.isnan(array) )

    # number of NaN elements
    n_nans = len(inans)

    # arrays which contain replaced values to check for convergence
    replaced_new = np.zeros( n_nans, dtype=DTYPEf)
    replaced_old = np.zeros( n_nans, dtype=DTYPEf)

    # depending on kernel type, fill kernel array
    if method == 'localmean':

        print 'kernel_size', kernel_size       
        for i in range(2*kernel_size+1):
            for j in range(2*kernel_size+1):
                kernel[i,j] = 1
        print kernel, 'kernel'

    elif method == 'idw':
        kernel = np.array([[0, 0.5, 0.5, 0.5,0],
                  [0.5,0.75,0.75,0.75,0.5], 
                  [0.5,0.75,1,0.75,0.5],
                  [0.5,0.75,0.75,0.5,1],
                  [0, 0.5, 0.5 ,0.5 ,0]])
        print kernel, 'kernel'      
    else:
        raise ValueError( 'method not valid. Should be one of `localmean`.')

    # fill new array with input elements
    for i in range(array.shape[0]):
        for j in range(array.shape[1]):
            filled[i,j] = array[i,j]

    # make several passes
    # until we reach convergence
    for it in range(max_iter):
        print 'iteration', it
        # for each NaN element
        for k in range(n_nans):
            i = inans[k]
            j = jnans[k]

            # initialize to zero
            filled[i,j] = 0.0
            n = 0

            # loop over the kernel
            for I in range(2*kernel_size+1):
                for J in range(2*kernel_size+1):

                    # if we are not out of the boundaries
                    if i+I-kernel_size < array.shape[0] and i+I-kernel_size >= 0:
                        if j+J-kernel_size < array.shape[1] and j+J-kernel_size >= 0:

                            # if the neighbour element is not NaN itself.
                            if filled[i+I-kernel_size, j+J-kernel_size] == filled[i+I-kernel_size, j+J-kernel_size] :

                                # do not sum itself
                                if I-kernel_size != 0 and J-kernel_size != 0:

                                    # convolve kernel with original array
                                    filled[i,j] = filled[i,j] + filled[i+I-kernel_size, j+J-kernel_size]*kernel[I, J]
                                    n = n + 1*kernel[I,J]

            # divide value by effective number of added elements
            if n != 0:
                filled[i,j] = filled[i,j] / n
                replaced_new[k] = filled[i,j]
            else:
                filled[i,j] = np.nan

        # check if mean square difference between values of replaced
        #elements is below a certain tolerance
        print 'tolerance', np.mean( (replaced_new-replaced_old)**2 )
        if np.mean( (replaced_new-replaced_old)**2 ) < tol:
            break
        else:
            for l in range(n_nans):
                replaced_old[l] = replaced_new[l]

    return filled


def sincinterp(image, x,  y, kernel_size=3 ):
    """Re-sample an image at intermediate positions between pixels.
This function uses a cardinal interpolation formula which limits
the loss of information in the resampling process. It uses a limited
number of neighbouring pixels.
The new image :math:`im^+` at fractional locations :math:`x` and :math:`y` is computed as:
.. math::
im^+(x,y) = \sum_{i=-\mathtt{kernel\_size}}^{i=\mathtt{kernel\_size}} \sum_{j=-\mathtt{kernel\_size}}^{j=\mathtt{kernel\_size}} \mathtt{image}(i,j) sin[\pi(i-\mathtt{x})] sin[\pi(j-\mathtt{y})] / \pi(i-\mathtt{x}) / \pi(j-\mathtt{y})
Parameters
----------
image : np.ndarray, dtype np.int32
the image array.
x : two dimensions np.ndarray of floats
an array containing fractional pixel row
positions at which to interpolate the image
y : two dimensions np.ndarray of floats
an array containing fractional pixel column
positions at which to interpolate the image
kernel_size : int
interpolation is performed over a ``(2*kernel_size+1)*(2*kernel_size+1)``
submatrix in the neighbourhood of each interpolation point.
Returns
-------
im : np.ndarray, dtype np.float64
the interpolated value of ``image`` at the points specified
by ``x`` and ``y``
"""

    # indices
#    cdef int i, j, I, J

    # the output array
    r = np.zeros( [x.shape[0], x.shape[1]], dtype=DTYPEf)

    # fast pi
    pi = 3.1419

    # for each point of the output array
    for I in range(x.shape[0]):
        for J in range(x.shape[1]):

            #loop over all neighbouring grid points
            for i in range( int(x[I,J])-kernel_size, int(x[I,J])+kernel_size+1 ):
                for j in range( int(y[I,J])-kernel_size, int(y[I,J])+kernel_size+1 ):
                    # check that we are in the boundaries
                    if i >= 0 and i <= image.shape[0] and j >= 0 and j <= image.shape[1]:
                        if (i-x[I,J]) == 0.0 and (j-y[I,J]) == 0.0:
                            r[I,J] = r[I,J] + image[i,j]
                        elif (i-x[I,J]) == 0.0:
                            r[I,J] = r[I,J] + image[i,j] * np.sin( pi*(j-y[I,J]) )/( pi*(j-y[I,J]) )
                        elif (j-y[I,J]) == 0.0:
                            r[I,J] = r[I,J] + image[i,j] * np.sin( pi*(i-x[I,J]) )/( pi*(i-x[I,J]) )
                        else:
                            r[I,J] = r[I,J] + image[i,j] * np.sin( pi*(i-x[I,J]) )*np.sin( pi*(j-y[I,J]) )/( pi*pi*(i-x[I,J])*(j-y[I,J]))
    return r


#cdef extern from "math.h":
 #   double sin(double)

score 3 · Accepted Answer

一个适用于掩码数据的简单平滑函数将解决此问题。然后可以避免涉及组成数据的方法（即插值、修复等）；并且应始终避免编造数据。

平滑屏蔽数据时出现的主要问题是，对于每个点，平滑使用相邻值来计算中心点的新值，但是当这些邻居被屏蔽时，中心点的新值也将被屏蔽，因为掩码数组的规则。因此，需要使用未屏蔽的数据进行计算，并明确说明屏蔽。这很容易做到，并且不在下面的函数smooth中。

from numpy import *
import pylab as plt

#  make a grid and a striped mask as test data
N = 100
x = linspace(0, 5, N, endpoint=True)
grid = 2. + 1.*(sin(2*pi*x)[:,newaxis]*sin(2*pi*x)>0.)
m = resize((sin(pi*x)>0), (N,N))

plt.imshow(grid.copy(), cmap='jet', interpolation='nearest')
plt.colorbar()
plt.title('original data')


def smooth(u, mask):
    m = ~mask
    r = u*m  # set all 'masked' points to 0. so they aren't used in the smoothing
    a = 4*r[1:-1,1:-1] + r[2:,1:-1] + r[:-2,1:-1] + r[1:-1,2:] + r[1:-1,:-2]
    b = 4*m[1:-1,1:-1] + m[2:,1:-1] + m[:-2,1:-1] + m[1:-1,2:] + m[1:-1,:-2]  # a divisor that accounts for masked points
    b[b==0] = 1.  # for avoiding divide by 0 error (region is masked so value doesn't matter)
    u[1:-1,1:-1] = a/b

# run the data through the smoothing filter a few times
for i in range(10):   
    smooth(grid, m)

mg = ma.array(grid, mask=m)  # put together the mask and the data

plt.figure()
plt.imshow(mg, cmap='jet', interpolation='nearest')
plt.colorbar()
plt.title('smoothed with mask')

plt.show()

在此处输入图像描述

主要的一点是，在掩码的边界处，被掩码的值没有用于平滑。（这也是网格方块切换值的地方，因此如果使用了被掩蔽的相邻值，图中会很清楚。）

score 0 · Accepted Answer

我们也遇到了这个问题，astropy 包已经涵盖了我们：

import numpy as np
import matplotlib.pyplot as plt

# Some Axes
x = np.arange(100)
y = np.arange(100)
#Some Interesting Shape
z = np.array(np.outer(np.sin((x+y)/10),np.sin(y/3)),dtype=float)

# some mask
mask = np.outer(np.sin((x+y)/20),np.sin(y/5))**2>.9

# masked data represent noise, so lets put in some trash into the masked points
z[mask] = (np.random.random(size = (100,100))*10)[mask]

# masked data
z_masked = np.ma.masked_array(z, mask)

# "Conventional" filter
filter_kernelsize = 2

import scipy.ndimage
z_filtered_bad = scipy.ndimage.gaussian_filter(z_masked,filter_kernelsize)

# Lets filter it
import astropy.convolution.convolve
from astropy.convolution import Gaussian2DKernel
k = Gaussian2DKernel(1.5)

z_filtered = astropy.convolution.convolve(z_masked, k, boundary='extend')

### Plots:
fig, axes = plt.subplots(2,2)
plt.sca(axes[0,0])
plt.title('Raw Data')
plt.imshow(z)
plt.colorbar()

plt.sca(axes[0,1])
plt.title('Raw Data Masked')
plt.imshow(z_masked)
plt.colorbar()

plt.sca(axes[1,0])
plt.title('ndimage filter (ignores mask)')
plt.imshow(z_filtered_bad)
plt.colorbar()

plt.sca(axes[1,1])
plt.title('astropy filter (uses mask)')
plt.imshow(z_filtered)
plt.colorbar()

plt.tight_layout()

代码的输出图

matplotlib - 想要从掩码数组中平滑轮廓

3 回答 3

Related

Reference