考虑构建一个类似于 Excel 数据但值与规范匹配的帮助数据框(第 8 行和 DJ 列具有这些特定值)。然后在循环中迭代地合并这个帮助数据框,如果它返回匹配,有条件地附加到您的数据框列表中。
注意:将列调整为实际的列名,替换list('ABCDEFGHIJ')
为名称列表['col1','col2','col3',...]
、、inDataFrame()
和merge()
调用。
check_df = pd.concat([pd.DataFrame([[0]*9 for _ in range(7)],
columns=['Heading code','Heading name','91','92','93','94','95','96','97']),
pd.DataFrame([[0,0,91,92,93,94,95,96,97]],
columns=['Heading code','Heading name','91','92','93','94','95','96','97'])],
ignore_index=True).reset_index()
print(check_df)
# index Heading code Heading name 91 92 93 94 95 96 97
# 0 0 0 0 0 0 0 0 0 0 0
# 1 1 0 0 0 0 0 0 0 0 0
# 2 2 0 0 0 0 0 0 0 0 0
# 3 3 0 0 0 0 0 0 0 0 0
# 4 4 0 0 0 0 0 0 0 0 0
# 5 5 0 0 0 0 0 0 0 0 0
# 6 6 0 0 0 0 0 0 0 0 0
# 7 7 0 0 91 92 93 94 95 96 97
dfs = []
for f in files_xlsx:
city_name = pd.read_excel(f, "1. City", nrows=1, parse_cols="C", header=None, skiprows=1)
country_code = pd.read_excel(f, "1. City", nrows=1, parse_cols="C", header=None, skiprows=2)
data = pd.read_excel(f, "1. City", parse_cols="B:J", header=None, skiprows=8)\
.assign(city_name=city_name.iat[0,0], city_code=country_code.iat[0,0])
data.columns = ['Heading code','Heading name','91','92','93','94','95','96','97','City name','City code']
# INNER JOIN MERGE ON INDEX AND COLS, D-J
tmp = data.reset_index().merge(check_df, on=['index','91','92','93','94','95','96','97'])
# CONDITIONALLY APPEND
if len(tmp) > 0:
dfs.append(data)
df = pd.concat(dfs, ignore_index=True)
下面用随机数据演示
np.random.seed(82118)
# LIST OF FIVE DATAFRAMES (TO RESEMBLE EXCEL DFs)
rand_dfs = [pd.DataFrame([np.random.randint(1, 100, 9) for _ in range(10)],
columns=['Heading code','Heading name','91','92','93','94','95','96','97'])
for _ in range(5)]
# UPDATE TWO DATAFRAMES EACH WITH 10 COLS TO INCLUDE MATCHING 8TH ROW
rand_dfs[2].loc[7] = [0, 0, 91, 92, 93, 94, 95, 96, 97]
rand_dfs[4].loc[7] = [0, 0, 91, 92, 93, 94, 95, 96, 97]
final_dfs = []
for d in rand_dfs:
tmp = d.reset_index().merge(check_df, on=['index','91','92','93','94','95','96','97'])
if len(tmp) > 0:
final_dfs.append(d)
final_df = pd.concat(final_dfs, ignore_index=True)
输出(见第 8 行和第 17 行匹配标准)
print(final_df)
# Heading code Heading name 91 92 93 94 95 96 97
# 0 53 98 67 8 86 33 65 56 62
# 1 61 9 40 14 18 9 53 30 24
# 2 89 88 80 91 91 49 8 39 84
# 3 15 99 49 92 63 96 11 95 29
# 4 13 62 82 12 34 92 54 29 47
# 5 44 18 67 61 52 71 52 25 12
# 6 56 25 52 10 82 12 59 63 15
# 7 0 0 91 92 93 94 95 96 97
# 8 51 50 27 38 34 11 57 92 3
# 9 49 99 46 87 46 5 63 24 8
# 10 31 62 8 23 19 66 60 10 66
# 11 51 98 30 44 45 39 32 74 82
# 12 88 19 54 28 38 71 3 31 34
# 13 58 13 89 17 96 35 12 52 85
# 14 93 67 13 13 28 43 24 7 4
# 15 34 26 73 20 44 37 18 17 22
# 16 59 1 99 9 11 6 4 99 95
# 17 0 0 91 92 93 94 95 96 97
# 18 88 6 23 20 35 26 37 56 51
# 19 21 67 19 63 77 98 41 9 22