0

这是我存储在 df1 中的每日 ohlc 数据的小样本。

date                open    close   high    low
2019-01-01 00:00:00 3700    3800    3806    3646
2019-01-02 00:00:00 3800    3857    3880    3750
2019-01-03 00:00:00 3858    3766    3863    3729
2019-01-04 00:00:00 3768    3791    3821    3706
2019-01-05 00:00:00 3789    3772    3839    3756
2019-01-06 00:00:00 3776    3988    4023    3747
2019-01-07 00:00:00 3985    3972    4018    3928

我想创建一个数据框 (df2),它代表活动年份蜡烛在进展过程中的样子。收盘以当日收盘价为准,最高价为1月1日至当天的最高价,最低价为1月1日至当天的最低价,开盘价为当年开盘价。应该是这样的:

date                open    close   high    low
2019-01-01 00:00:00 3700    3800    3806    3646
2019-01-02 00:00:00 3700    3857    3880    3646
2019-01-03 00:00:00 3700    3766    3880    3646
2019-01-04 00:00:00 3700    3791    3880    3646
2019-01-05 00:00:00 3700    3772    3880    3646
2019-01-06 00:00:00 3700    3988    4023    3646
2019-01-07 00:00:00 3700    3972    4023    3646

很想放一些代码,但我在这里迷路了,我认为重新采样会对我有所帮助,但它只是将整年总结为一行数据。我也想认为我可以通过每天迭代和重新采样来解决这个问题,但我知道这会减慢计算速度,所以我希望看看这是否可以通过矢量化实现。这是我第一次发帖,如果有任何需要改进的指导方针,请告诉我。

- - - - - - - -编辑 - - - - - - - - -

这是我工作年份的完整代码,但其他时间框架不工作,希望当我从公共来源 yfinance 提取数据时,复制糟糕的结果会更容易。

import pandas as pd
import yfinance as yf

#not working
def resample_active_week(df):
    df2 = pd.DataFrame()

    # high is the max from Jan1 to current day
    df2['high'] = df.groupby(df.index.isocalendar().week)['high'].cummax()

    # low is the min from Jan1 to current day 
    df2['low'] = df.groupby(df.index.isocalendar().week)['low'].cummin()

    #close
    df2['close'] = df['close']

    # open is based on the open of the current week
    df2['open'] = df.groupby(df.index.isocalendar().week)['open'].head(1)
    df2=df2.fillna(method='ffill')

    return df2
#not working    
def resample_active_month(df):
    df2 = pd.DataFrame()

    # high is the max from Jan1 to current day
    df2['high'] = df.groupby(df.index.month)['high'].cummax()

    # low is the min from Jan1 to current day 
    df2['low'] = df.groupby(df.index.month)['low'].cummin()

    #close
    df2['close'] = df['close']

    # open is based on the open of the current month
    df2['open'] = df.groupby(df.index.month)['open'].head(1)
    df2=df2.fillna(method='ffill')

    return df2

#not working
def resample_active_quarter(df):
    df2 = pd.DataFrame()

    # high is the max from Jan1 to current day
    df2['high'] = df.groupby(df.index.quarter)['high'].cummax()

    # low is the min from Jan1 to current day 
    df2['low'] = df.groupby(df.index.quarter)['low'].cummin()

    #close
    df2['close'] = df['close']

    # open is based on the open of the current quarter
    df2['open'] = df.groupby(df.index.quarter)['open'].head(1)
    df2=df2.fillna(method='ffill')

    return df2
#working
def resample_active_year(df):
    df2 = pd.DataFrame()
    
    # high is the max from Jan1 to current day
    df2['high'] = df.groupby(df.index.year)['high'].cummax()

    # low is the min from Jan1 to current day 
    df2['low'] = df.groupby(df.index.year)['low'].cummin()

    #close
    df2['close'] = df['close']

    # open is based on the open of the current year
    df2['open'] = df.groupby(df.index.year)['open'].head(1)
    df2=df2.fillna(method='ffill')

    return df2

df = yf.download(tickers='BTC-USD', period = 'max', interval = '1d',auto_adjust = True)
df.rename(columns={'Open':'open', 'High':'high','Low':'low','Close':'close'}, inplace=True)
df = df.drop(['Volume'],axis=1)

df2 = resample_active_week(df)
df3 = resample_active_month(df)
df4 = resample_active_quarter(df)
df5 = resample_active_year(df)

with pd.ExcelWriter('ResampleOut.xlsx', engine="openpyxl", mode="w") as writer:
            df.to_excel(writer, sheet_name='df_original')
            df2.to_excel(writer, sheet_name='df2_week')
            df3.to_excel(writer, sheet_name='df3_month')
            df4.to_excel(writer, sheet_name='df4_quarter')
            df5.to_excel(writer, sheet_name='df5_year')
4

1 回答 1

0
# set date as the index
df = df.set_index('date')

# high is the max from Jan1 to current day
df['max'] = df.groupby(df.index.year)['max'].cummax()

# low is the min from Jan1 to current day 
df['min'] = df.groupby(df.index.year)['min'].cummin()

# open is based on the open of the year
for ind, row in df.iterrows():
    row['open'] = df.loc[ind.replace(month=1, day=1), 'open']

# OPTIONAL: reset index
df = df.reset_index()
于 2021-04-26T06:02:48.047 回答