所以几乎我有一个脚本,它采用一组时间序列,从第二个时间序列中删除时间和一些其他信息,然后将其添加到 csv 文件的外端。我遇到的问题是它不断在我的文件末尾存储 3 个空白 ,,,,,,, 行,但随着脚本的继续更新这些行。代码是这样的:
import pandas as pd
import time
def compiler():
for i in range(1000):
# Read File
df = pd.read_csv(r'C:/Users/J/Desktop/dropmarketdata/xz.csv')
# Remove useless info
df.pop('cached')
df.pop('id')
df.pop('name')
df.pop('last_updated')
df.pop('max_supply')
# Read 2nd file
ohlc = pd.read_csv(r'C:/Users/J/Desktop/dropmarketdata/ohlc/ohlc.csv')
main_df = pd.DataFrame()
# Drop datetime because im already indexing by it on the other file
del ohlc['datetime']
# join to outside or at the end of each lines where both files have
# the same number of lines
main_df = df.join(ohlc, how='outer')
main_df.set_index('datetime', inplace=True)
main_df.to_csv(r'C:/Users/J/Desktop/dropmarketdata/
ohlcomp.csv', float_format='%.8f')
print('saving....')
time.sleep(900)
print('15m has surpassed....')
compiler()
问题是我的文件总是这样:
2018-04-16 01:57:09.021924,85409.30000000,18473609990.00000000,77146350.00000000,-0.11000000,-1.92000000,-7.11000000,0.00000052,0.00417603,147,DROP,30000000000.00000000,,,,,
2018-04-16 02:12:10.098678,85061.30000000,18473609990.00000000,74266498.00000000,-4.09000000,-5.59000000,-10.38000000,0.00000050,0.00402014,148,DROP,30000000000.00000000,,,,,
2018-04-16 02:27:10.916329,87757.50000000,18473609990.00000000,76921156.00000000,1.22000000,-2.24000000,-6.99000000,0.00000052,0.00416384,147,DROP,30000000000.00000000,,,,,
按日期索引的每一行,其中所有 ,,,,, 位于行的末尾实际上应该具有 H,L,O,C 数据。如果这听起来像一个愚蠢的问题,我对 python 很陌生,很抱歉。谢谢您的帮助。
编辑:
对于需要自己流式传输数据的任何人,此代码应该可以工作
import pandas as pd
import time
from datetime import datetime
import coinmarketcap
from coinmarketcap import Market
import ccxt
def compiler():
# Read Filed
df = pd.read_csv('other.csv')
ohlc = pd.read_csv('ohlc.csv')
# Remove useless info
df.pop('cached')
df.pop('id')
df.pop('name')
df.pop('last_updated')
df.pop('max_supply')
main_df = pd.DataFrame()
# Drop datetime because im already indexing by it on the other file
del ohlc['datetime']
# join to outside or at the end of each lines where both files have
# the same number of lines
main_df = df.join(ohlc, how='outer')
main_df.set_index('datetime', inplace=True)
main_df.to_csv('file.csv', float_format='%.8f')
print('saving compiled list....')
def collect1():
#pulling from tidex
tidex = ccxt.tidex()
tidex.load_markets(True)
ticker = tidex.fetch_ticker('DROP/BTC')
ticker_df = pd.DataFrame(ticker, index=['f'], columns=['ask', 'bid', 'close', 'high', 'low', 'datetime'])
ticker_df['ask'] = '%.8f' % ticker_df['ask']
ticker_df['bid'] = '%.8f' % ticker_df['bid']
ticker_df['close'] = '%.8f' % ticker_df['close']
ticker_df['high'] = '%.8f' % ticker_df['high']
ticker_df['low'] = '%.8f' % ticker_df['low']
ticker_df.loc[:, 'datetime'] = pd.Series("{:}".format(datetime.now()), index=ticker_df.index)
ticker_df.set_index(pd.DatetimeIndex(ticker_df.loc[:, 'datetime']), inplace=True)
ticker_df.pop('datetime')
ticker_df.to_csv('ohlc.csv', float_format='%.8f')
def collect2():
#pulling information from coinmarketcap
market = Market()
ticker2 = market.ticker("dropil")
dropArray = pd.DataFrame(ticker2)
dropArray.loc[:, 'datetime'] = pd.Series("{:}".format(datetime.now()), index=dropArray.index)
dropArray.reset_index()
dropArray.set_index(pd.DatetimeIndex(dropArray.loc[:, 'datetime']), inplace=True)
dropArray.pop('datetime')
dropArray.to_csv('other.csv', float_format='%.8f')
for i in range(1000):
collect1()
collect2()
compiler()
time.sleep(900)