0

所以几乎我有一个脚本,它采用一组时间序列,从第二个时间序列中删除时间和一些其他信息,然后将其添加到 csv 文件的外端。我遇到的问题是它不断在我的文件末尾存储 3 个空白 ,,,,,,, 行,但随着脚本的继续更新这些行。代码是这样的:

import pandas as pd
import time


def compiler():
    for i in range(1000):
        # Read File
        df = pd.read_csv(r'C:/Users/J/Desktop/dropmarketdata/xz.csv')
        # Remove useless info
        df.pop('cached')
        df.pop('id')
        df.pop('name')
        df.pop('last_updated')
        df.pop('max_supply')
        # Read 2nd file
        ohlc = pd.read_csv(r'C:/Users/J/Desktop/dropmarketdata/ohlc/ohlc.csv')
        main_df = pd.DataFrame()
        # Drop datetime because im already indexing by it on the other file
        del ohlc['datetime']
        # join to outside or at the end of each lines where both files have 
        # the same number of lines
        main_df = df.join(ohlc, how='outer')
        main_df.set_index('datetime', inplace=True)
        main_df.to_csv(r'C:/Users/J/Desktop/dropmarketdata/
        ohlcomp.csv', float_format='%.8f')
        print('saving....')
        time.sleep(900)
        print('15m has surpassed....')


compiler()

问题是我的文件总是这样:

2018-04-16 01:57:09.021924,85409.30000000,18473609990.00000000,77146350.00000000,-0.11000000,-1.92000000,-7.11000000,0.00000052,0.00417603,147,DROP,30000000000.00000000,,,,,

2018-04-16 02:12:10.098678,85061.30000000,18473609990.00000000,74266498.00000000,-4.09000000,-5.59000000,-10.38000000,0.00000050,0.00402014,148,DROP,30000000000.00000000,,,,,

2018-04-16 02:27:10.916329,87757.50000000,18473609990.00000000,76921156.00000000,1.22000000,-2.24000000,-6.99000000,0.00000052,0.00416384,147,DROP,30000000000.00000000,,,,,

按日期索引的每一行,其中所有 ,,,,, 位于行的末尾实际上应该具有 H,L,O,C 数据。如果这听起来像一个愚蠢的问题,我对 python 很陌生,很抱歉。谢谢您的帮助。

编辑:

对于需要自己流式传输数据的任何人,此代码应该可以工作

import pandas as pd
import time
from datetime import datetime
import coinmarketcap
from coinmarketcap import Market
import ccxt


def compiler():
    # Read Filed
    df = pd.read_csv('other.csv')
    ohlc = pd.read_csv('ohlc.csv')
    # Remove useless info
    df.pop('cached')
    df.pop('id')
    df.pop('name')
    df.pop('last_updated')
    df.pop('max_supply')
    main_df = pd.DataFrame()
    # Drop datetime because im already indexing by it on the other file
    del ohlc['datetime']
    # join to outside or at the end of each lines where both files have
    # the same number of lines
    main_df = df.join(ohlc, how='outer')
    main_df.set_index('datetime', inplace=True)
    main_df.to_csv('file.csv', float_format='%.8f')
    print('saving compiled list....')


def collect1():
    #pulling from tidex
    tidex = ccxt.tidex()
    tidex.load_markets(True)
    ticker = tidex.fetch_ticker('DROP/BTC')
    ticker_df = pd.DataFrame(ticker, index=['f'], columns=['ask', 'bid', 'close', 'high', 'low', 'datetime'])
    ticker_df['ask'] = '%.8f' % ticker_df['ask']
    ticker_df['bid'] = '%.8f' % ticker_df['bid']
    ticker_df['close'] = '%.8f' % ticker_df['close']
    ticker_df['high'] = '%.8f' % ticker_df['high']
    ticker_df['low'] = '%.8f' % ticker_df['low']
    ticker_df.loc[:, 'datetime'] = pd.Series("{:}".format(datetime.now()), index=ticker_df.index)
    ticker_df.set_index(pd.DatetimeIndex(ticker_df.loc[:, 'datetime']), inplace=True)
    ticker_df.pop('datetime')
    ticker_df.to_csv('ohlc.csv', float_format='%.8f')


def collect2():
    #pulling information from coinmarketcap
    market = Market()
    ticker2 = market.ticker("dropil")
    dropArray = pd.DataFrame(ticker2)
    dropArray.loc[:, 'datetime'] = pd.Series("{:}".format(datetime.now()), index=dropArray.index)
    dropArray.reset_index()
    dropArray.set_index(pd.DatetimeIndex(dropArray.loc[:, 'datetime']), inplace=True)
    dropArray.pop('datetime')
    dropArray.to_csv('other.csv', float_format='%.8f')


for i in range(1000):
    collect1()
    collect2()
    compiler()
    time.sleep(900)
4

0 回答 0