不清楚:
- 你是如何阅读工作表的
当两张表都有值时,您希望如何处理连接。我假设你想总结一下。
import numpy as np
import pyexcel as pe
a = np.array(pe.get_array(file_name='Sheet1.xlsx'))
b = np.array(pe.get_array(file_name='Sheet2.xlsx'))
c = np.array(pe.get_array(file_name='Sheet3.xlsx'))
all=[a,b,c]
max_cols = max([i.shape[1] for i in all])
for i in range(3):
if all[i].dtype!=np.dtype('int'):
all[i][all[i]=='']=0
all[i]=all[i].astype('int')
if (all[i].shape[1] != max_cols):
all[i]=np.hstack([all[i], [[0]*(max_cols-all[i].shape[1])]*(all[i].shape[0])])
np.sum(np.vstack(all), 0)
编辑
使用您将不需要 for 循环(仅用于循环不同的工作表)。这将以pythonic方式使用numpy!
def join_sheets(a, b):
both = [a,b]
max_cols = max([i.number_of_columns() for i in both])
min_rows = min([i.number_of_rows() for i in both])
both_arr = [np.array(i.array) for i in both]
for i in range(2):
both_arr[i] = np.hstack([both_arr[i], [['']*(max_cols - both_arr[i].shape[1])]*(both_arr[i].shape[0])])
both_arr[0][0:min_rows,][both_arr[1][0:min_rows,]!=''] = both_arr[1][0:min_rows,][both_arr[1][0:min_rows,]!='']
if (b.number_of_rows() > min_rows):
both_arr[0] = np.vstack([both_arr[0], both_arr[1][min_rows:,]])
a.array = both_arr[0].tolist()
sheets = pe.get_book(file_name='Sheet1.xlsx')
for i in range(1, sheets.number_of_sheets()): join_sheets(sheets[0], sheets[i])
sheets.save_as(sheets.path + '/' + sheets.filename)