我找不到解决方案。我必须知道,一个值比日期时间字段值更晚我知道我的脏冗余代码太慢了,只需找到一个解决方案(对于少数 thousend 记录时间并不那么重要)
from io import StringIO
import pandas as pd
from pandas.core.frame import DataFrame
data='''m_num|m_dt|name
24|12.21 05:30|Duka A. (Rus)
24|12.21 05:30|Vologzhanin V. (Rus)
25|12.21 05:45|Arutiunyan A. (Rus)
25|12.21 05:45|Tsvetkov V. (Rus)
26|12.21 05:45|Shmakov A. (Rus)
26|12.21 05:45|Klimentev V. (Rus)
33|12.21 07:00|Belugin O. (Rus)
33|12.21 07:00|Duka A. (Rus)
34|12.21 07:15|Gerasimov O. (Ukr)
34|12.21 07:15|Shmakov A. (Rus)
35|12.21 07:15|Tolmachev I. (Rus)
35|12.21 07:15|Arutiunyan A. (Rus)
36|12.21 07:30|Duka A. (Rus)
36|12.21 07:30|Bogomolov A. (Rus)
37|12.21 07:45|Arutiunyan A. (Rus)
37|12.21 07:45|Olshakov K. (Rus)'''
df=pd.read_csv(StringIO(data),sep='|',header=0)
print(df)
print('--------------------')
for idx,a in df.iterrows():
a_time=pd.to_datetime(a['m_dt'], format = '%m.%d %H:%M')
df_rest=df[pd.to_datetime(df['m_dt'], format = '%m.%d %H:%M') > a_time]
print(len(df_rest))
b=a['name']
c=df_rest['name'].values.tolist()
df.loc[lambda df: df['m_num'] == a, 'dupe']=any(b in s for s in list(c))
这有效:
pd.to_datetime(df['m_dt'], format = '%m.%d %H:%M')
for idx,a in df.iterrows():
df_rest=df[df['m_dt'] > a['m_dt']]
name_ls=df_rest['name'].values.tolist()
df.loc[idx, 'dupe'] = any(a['name'] in s for s in name_ls)
print(df)