我正在下载 5 只股票('A'、'AAP'、'AAPL'、'ABBV'、'ABC')的 15 年数据(每日收盘)。问题是我有一些重复。第一个“A”没有问题,我得到了适量的数据。对于第二个“AAP”,我的行数是正确的两倍,似乎数据被下载了两次。最后 3 只股票的问题相同,我的行数是正确的三倍。我附上了一个显示 csv 文件大小的屏幕截图,如果一切正常,这些文件应该具有相同的大小。
我怀疑问题来自调用 reqHistoricalData 后的 10 秒暂停;可能太长了。我怎样才能避免重复的行以及如何暂停适当的时间(不要太长也不要太短)?
import pandas as pd
import datetime as dt
import time
import collections
import threading
import os
from ibapi.client import EClient
from ibapi.wrapper import EWrapper
from ibapi.contract import Contract
from ibapi.common import BarData
path = r"D:\trading\data\debug\\"
class IBapi(EWrapper, EClient):
def __init__(self):
EClient.__init__(self, self)
self.data=collections.defaultdict(list)
def nextValidId(self, orderId: int):
super().nextValidId(orderId)
self.nextorderId = orderId
print('The next valid order id is: ', self.nextorderId)
def error(self, reqId, errorCode, errorString):
super().error(reqId, errorCode, errorString)
print("Error. Id:", reqId, "Code:", errorCode, "Msg:", errorString)
def historicalData(self, reqId:int, bar:BarData):
self.data["date"].append(bar.date)
self.data["close"].append(bar.close)
self.df = pd.DataFrame.from_dict(self.data)
tickers = ["A","AAP","AAPL","ABBV","ABC"]
def run_loop():
app.run()
app = IBapi()
app.connect("127.0.0.1", 7496, 5)
app.nextorderId = None
# Start the socket in a thread
api_thread = threading.Thread(target=run_loop, daemon=True)
api_thread.start()
# Check if the API is connected via orderid
while True:
if isinstance(app.nextorderId, int):
print('connected')
break
else:
print('waiting for connection')
time.sleep(1)
n_id = app.nextorderId
for ticker in tickers:
contract = Contract()
contract.symbol = ticker
contract.secType = "STK"
contract.exchange = "SMART"
contract.currency = "USD"
app.reqHistoricalData(n_id, contract, "","15 Y", "1 day", "TRADES", 1, 1, False, [])
time.sleep(10)
app.df.to_csv(path + ticker + ".csv")
n_id = n_id + 1
app.disconnect()