我正在尝试编写一个简单的爬虫。但是尝试从服务器读取响应仅在第一次返回答案。然后阅读器停止读取字节并返回 b''。我试着在写完后设置一个超时并使用 drain() 。它没有产生结果。Wireshark 显示答案来自服务器,但我的程序看不到它们。
import asyncio
HOST = '93.184.216.34'
PORT = 80
CONCURRENT_CONNECTIONS = 3
request = 'GET / HTTP/1.1\r\n' \
'Host: example.com\r\n' \
'Content-Type: application/x-www-form-urlencoded\r\n' \
'\r\n'.encode()
async def smart_read(reader):
buffer = b''
while True:
response = await reader.read(1024)
if not response:
break
buffer += response
return buffer
async def work(host, port, request):
reader, writer = await asyncio.open_connection(host, port)
while True:
writer.write(request)
resp = await smart_read(reader)
print(resp)
tasks = []
for _ in range(CONCURRENT_CONNECTIONS):
tasks.append(asyncio.ensure_future(work(HOST, PORT, request)))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
loop.close()