如果您不坚持使用 pandas 中的 read_html,则此代码可以完成工作:
import pandas as pd
from lxml.html import parse
from urllib2 import urlopen
from pandas.io.parsers import TextParser
def _unpack(row, kind='td'):
elts = row.findall('.//%s' % kind)
return [val.text_content() for val in elts]
def parse_options_data(table):
rows = table.findall('.//tr')
header = _unpack(rows[0], kind='th')
data = [_unpack(r) for r in rows[1:]]
return TextParser(data, names=header).get_chunk()
parsed = parse(urlopen('http://www.bmfbovespa.com.br/en-us/intros/Limits-and-Haircuts-for-accepting-stocks-as-collateral.aspx?idioma=en-us'))
doc = parsed.getroot()
tables = doc.findall('.//table')
table = parse_options_data(tables[0])
这摘自 Wes McKinney 的“Python for Data analysis”一书。