我正在尝试批量提取 20,000 个域名的 WHOIS 信息,python 代码适用于我的 csv 文件中的 2 个项目,但会导致 20000 个域名的整个数据集出现错误
尝试了2个域名,OK。使用 20k 域名的完整列表会带来错误
import whois
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import socket
import os
import csv
import datetime
import time
import requests
from ipwhois import IPWhois
from urllib import request
from ipwhois.utils import get_countries
import tldextract
from ipwhois.utils import get_countries
countries = get_countries(is_legacy_xml=True)
from ipwhois.experimental import bulk_lookup_rdap
from ipwhois.hr import (HR_ASN, HR_ASN_ORIGIN, HR_RDAP_COMMON, HR_RDAP, HR_WHOIS, HR_WHOIS_NIR)
countries = get_countries(is_legacy_xml=True)
import ipaddress
df = pd.read_csv('labelled_dataset.csv')
#TimeOut Setting
s = socket.socket()
s.settimeout(10)
#Date Processing Function
def check_date_type(d):
if type(d) is datetime.datetime:
return d
if type(d) is list:
return d[0]
for index,row in df.iterrows():
DN = df.iloc[index]['Domains']
df['IPaddr'] = socket.gethostbyname(DN)
df['IPcity'] = IPWhois(socket.gethostbyname(DN), allow_permutations=True).lookup_whois()['nets'][0]['city']
df['ASNumber'] = IPWhois(socket.gethostbyname(DN), allow_permutations=True).lookup_whois()['asn']
df['NetAddr'] = IPWhois(socket.gethostbyname(DN), allow_permutations=True).lookup_whois()['nets'][0]['address']
df['NetCity'] = IPWhois(socket.gethostbyname(DN), allow_permutations=True).lookup_whois()['nets'][0]['city']
df['NetPostCode'] = IPWhois(socket.gethostbyname(DN), allow_permutations=True).lookup_whois()['nets'][0]['postal_code']
W = whois.whois(DN)
df['WebsiteName'] = W.name
df['ASRegistrar'] = W.registrar
df['CtryCode'] = W.country
df['Dstatus'] = W.status[1]
df['RegDate'] = check_date_type(W.creation_date)
df['ExDate'] = check_date_type(W.expiration_date)
df.to_csv('extracted_dataset_1_1.csv', index=False)
期望 ASN 详细信息的输出,每个域名的 WHOIS 信息导出到 csv 文件中