0

我有 1 分钟的 OHLCV 烛台数据,我需要将其聚合以创建 15m 烛台。数据库来自MongoDB;这是一个干净的 Python 版本:

def get_candela(self,tf):
    c = dict()
    candel = dict()
    candele_finale = list()
    prov_c = list()
    db = database("price_data", "1min_OHLC_XBTUSD")
    col = database.get_collection(db,"1min_OHLC_XBTUSD")
    db_candela = col.find({}, sort = [('timestamp', pymongo.ASCENDING)]).limit(20)
    candele = list(db_candela)
    timestamp_calc = list()
    open_calc = list()
    max_calc = list()
    min_calc = list()
    close_calc = list()
    vol_calc = list()
    #for _ in range(len(candele)):
    for a in range(tf):
        if len(candele) ==  0:
            break
        prov_c.append(candele[a])
        c.append(prov_c)
        candele[:tf]=[]
    for b in range(len(c)):
        cndl = c[b]
    for d in range(tf):
        print(cndl)
        cnd = cndl[d]
        #print(len(cnd))
        timestamp_calc.append(cnd["timestamp"])
        open_calc.append(cnd["open"])
        max_calc.append(cnd["high"])
        min_calc.append(cnd["low"])
        close_calc.append(cnd["close"])
        vol_calc.append(cnd["volume"])
        index_close=len(close_calc)
        candel["timestamp"] = timestamp_calc[d]
        candel["open"] = open_calc[0]
        candel["high"] = max(max_calc)
        candel["low"] = min(min_calc)
        candel["close"] = close_calc[index_close-1]
        candel["volume"] = sum(vol_calc)
        #print(candel)
        candele_finale.append(candel)
        max_calc.clear()
        min_calc.clear()
        vol_calc.clear()
    return candele_finale

这将返回一个仅包含最后一个烛台创建的数组。这是熊猫的另一个版本:

db = database("price_data", "1min_OHLC_XBTUSD")
 col = database.get_collection(db,"1min_OHLC_XBTUSD")
 db_candela = col.find({}, sort = [('timestamp', pymongo.ASCENDING)]).limit(20)
 prov_c = list()


    for item in db_candela:
            cc={"timestamp":item["timestamp"],"open":item["open"],"high":item["high"],"low":item["low"],"close":item["close"],"volume":item["volume"]}
            prov_c.append(cc)
            print(prov_c)
        data = pandas.DataFrame([prov_c], index=[pandas.to_datetime(cc["timestamp"])])
            #print(data)
        df = data.resample('5T').agg({'timestamp':'first','open':'first','high':'max', 'low':'min','close' : 'last','volume': 'sum'})
        #print(data.mean())
        #with pandas.option_context('display.max_rows', None, 'display.max_columns',None):  # more options can be specified also
        pprint(df)

这将返回一个带有奇怪/随机值的数据框。

4

1 回答 1

0

我今天也有同样的问题,我回答了。基本上有一个名为 resample 的 pandas 函数可以为您完成所有工作。

这是我的代码:

import json
import pandas as pd

#load the raw data and clean it up

data_json = open('./testapiresults.json') #load json object
data_dict = json.load(data_json)          #convert to a dict object
df = pd.DataFrame.from_dict(data_dict)    #convert to panel data's dataframe object

df['datetime'] = pd.to_datetime(df['datetime'],unit='ms') #convert from unix time (ms from epoch) to human time
df = df.set_index(pd.DatetimeIndex(df['datetime'])) #set time series as index format for resample function

#resample to aggregate to a different frequency of data

candle_summary = pd.DataFrame()
candle_summary['open'] = df.open.resample('15Min').first()
candle_summary['high'] = df.high.resample('15Min').max()
candle_summary['low'] = df.low.resample('15Min').min()
candle_summary['close'] = df.close.resample('15Min').last()

candle_summary.head()

我必须导出到 csv 并在 excel 中重新计算它以仔细检查它是否正确计算,但这有效。我还没有想出 pandas.DataFrame.resample('30min').ohlc() 函数,但如果我能弄清楚如何让它在没有大量错误的情况下工作,它看起来可以编写一些非常优雅的代码。

于 2020-07-18T09:31:20.373 回答