python - 迭代、计算和组合两个具有多个元组的列表

Question

我正在使用包含 x、y、z 数据的投影坐标数据集（带有 XYZ 标题的 432 行 csv，未附加）。我希望导入这个数据集，根据用户输入计算一个新网格，然后开始对新网格内的点执行一些统计。我已经明白我有两个列表（raw_lst 与 431(x,y,z) 和 grid_lst 与 16(x,y)（调用 n,e)）但是当我尝试迭代开始计算平均值时新网格的密度和密度都分崩离析。我正在尝试输出一个最终列表，其中包含 grid_lst x 和 y 值以及计算出的平均 z 和密度值。

我搜索了 numpy 和 scipy 库，认为他们可能已经做了我想做的事情，但找不到任何东西。让我知道你们中的任何人是否有任何想法。

sample_xyz_reddot_is_newgrid_pictoral_representation

import pandas as pd
import math

df=pd.read_csv("Sample_xyz.csv")

N=df["X"]
E=df["Y"]
Z=df["Z"]

#grid = int(input("Specify grid value "))
grid = float(0.5) #for quick testing the grid value is set to 0.5 

#max and total calculate the input area extents
max_N = math.ceil(max(N))
max_E = math.ceil(max(E))
min_E = math.floor(min(E))
min_N = math.floor(min(N))

total_N = max_N - min_N
total_E = max_E - min_E
total_N = int(total_N/grid)
total_E = int(total_E/grid)

#N_lst and E_lst calculate the mid points based on the input file extents and the specified grid file
N_lst = [] 
n=float(max_N)-(0.5*grid)
for x in range(total_N):
    N_lst.append(n)
    n=n-grid

E_lst = []
e=float(max_E)-(0.5*grid)
for x in range(total_E):
    E_lst.append(e)
    e=e-grid

grid_lst = []
for n in N_lst:
    for e in E_lst:
        grid_lst.append((n,e))

#converts the imported dataframe to list
raw_lst = df.to_records(index=False)
raw_lst = list(raw_lst)

#print(grid_lst) # grid_lst is a list of 16 (n,e) tuples for the new grid coordinates.
#print(raw_lst) # raw_lst is a list of 441 (n,e,z) tuples from the imported file - calling these x,y,z.

#The calculation where it all falls apart.
t=[]
average_lst = []
for n, e in grid_lst:
    for x, y, z in raw_lst:
        if n >= x-(grid/2) and n <= x+(grid/2) and e >= y-(grid/2) and e <= y+(grid/2):
            t.append(z)
            average = sum(t)/len(t)
            density = len(t)/grid
            average_lst = (n,e,average,density)
            print(average_lst)
            # print("The length of this list is " + str(len(average_lst)))
            # print("The length of t is " + str(len(t)))

运行示例代码

import random

grid=5
raw_lst = [(random.randrange(0,10), random.randrange(0,10), random.randrange(0,2))for i in range(100)]
grid_lst = [(2.5,2.5),(2.5,7.5),(7.5,2.5),(7.5,7.5)]

t=[]
average_lst = []
for n, e in grid_lst:
    for x, y, z in raw_lst:
        if n >= x-(grid/2) and n <= x+(grid/2) and e >= y-(grid/2) and e <= y+(grid/2):
            t.append(z)
            average = sum(t)/len(t)
            density = len(t)/grid
            average_lst = (n,e,average,density)
            print(average_lst)

score 0 · Accepted Answer

一些建议

使用数组时，请使用 numpy. 它有更多的功能
使用网格时，将 x-coords、y-coords 用作单个数组通常更方便

对解决方案的评论

显然你有一个网格，或者更确切地说是一个盒子，grd_lst。我们将其生成为 numpy meshgrid (gx,gy)
你有很多点raw_list。我们将它的每个元素生成为一维 numpy 数组
您要选择 g_box 中的 r_points。我们使用百分比公式：tx = (rx-gxMin)/(gxMax-gxMin)
如果 tx, ty 在 [0..1] 我们存储索引
作为中间结果，我们得到了 g_box 内的所有 raw_list 索引
使用该索引，您可以提取 g_box 中的 raw_list 元素并可以进行一些统计
请注意，我省略了 z 坐标。您将不得不改进此解决方案。

--

import numpy as np
from matplotlib import pyplot as plt
import matplotlib.colors as mclr
from matplotlib import cm

f10 = 'C://gcg//picStack_10.jpg'      # output file name
f20 = 'C://gcg//picStack_20.jpg'      # output file name

def plot_grid(gx,gy,rx,ry,Rx,Ry,fOut):
    fig = plt.figure(figsize=(5,5)) 
    ax = fig.add_subplot(111)
    myCmap = mclr.ListedColormap(['blue','lightgreen'])
    ax.pcolormesh(gx, gy, gx, edgecolors='b', cmap=myCmap, lw=1, alpha=0.3)
    ax.scatter(rx,ry,s=150,c='r', alpha=0.7)
    ax.scatter(Rx,Ry,marker='s', s=150,c='gold', alpha=0.5)
    ax.set_aspect('equal')
    plt.savefig(fOut)
    plt.show()


def get_g_grid(nx,ny):
    ix = 2.5 + 5*np.linspace(0,1,nx)
    iy = 2.5 + 5*np.linspace(0,1,ny)
    gx, gy = np.meshgrid(ix, iy,  indexing='ij')
    return gx,gy

def get_raw_points(N):
    rx,ry,rz,rv = np.random.randint(0,10,N), np.random.randint(0,10,N), np.random.randint(0,2,N), np.random.uniform(low=0.0, high=1.0, size=N)
    return rx,ry,rz,rv


N = 100   
nx, ny = 2, 2
gx,gy = get_base_grid(nx,ny)
rx,ry,rz,rv = get_raw_points(N)
plot_grid(gx,gy,rx,ry,0,0,f10)

def get_the_points_inside(gx,gy,rx,ry):
    #----- run throuh the g-grid -------------------------------
    nx,ny = gx.shape
    N = len(rx)
    index = []
    for jx in range(0,nx-1):
        for jy in range(0,ny-1):
             #--- run through the r_points
            for jr in range(N):
                test_x = (rx[jr]-gx[jx,jy]) / (gx[jx+1,jy] - gx[jx,jy])  
                test_y = (ry[jr]-gy[jx,jy]) / (gy[jx,jy+1] - gy[jx,jy])  
                if (0.0 <= test_x <= 1.0) and (0.0 <= test_y <= 1.0):
                    index.append(jr)
    return index


index = get_the_points_inside(gx,gy,rx,ry)
Rx, Ry, Rz, Rv = rx[index],  ry[index],  rz[index],  rv[index]

plot_grid(gx,gy,rx,ry,Rx,Ry,f20)

python - 迭代、计算和组合两个具有多个元组的列表

1 回答 1

Related

Reference