就我自己而言,我对现有的解决方案并不满意。所以我用Python3
and创建了一个解决方案BeautifulSoup
。
该函数将 HTML 源代码作为字符串并查找标题标签(例如h1
)。在接下来的步骤id=
中,为标题和相应的目录条目创建一个。
def generate_toc(html_out):
"""Create a table of content based on the header tags.
The header tags are used to create and link the toc.
The toc as place on top of the html output.
Args:
html_out(string): A string containing the html source.
Returns:
(string): The new string.
"""
from bs4 import BeautifulSoup
# the parser
soup = BeautifulSoup(html_out, 'html.parser')
# create and place the div element containing the toc
toc_container = soup.new_tag('div', id='toc_container')
first_body_child = soup.body.find_all(recursive=False)[0]
first_body_child.insert_before(toc_container)
# toc headline
t = soup.new_tag('p', attrs={'class': 'toc_title'})
t.string = 'Inhalt'
toc_container.append(t)
def _sub_create_anchor(h_tag):
"""Create a toc entry based on a header-tag.
The result is a li-tag containing an a-tag.
"""
# Create anchor
anchor = uuid.uuid4()
h_tag.attrs['id'] = anchor # anchor to headline
# toc entry for that anchor
a = soup.new_tag('a', href=f'#{anchor}')
a.string = h_tag.string
# add to toc
li = soup.new_tag('li')
li.append(a)
return li
# main ul-tag for the first level of the toc
ul_tag = soup.new_tag('ul', attrs={'class': 'toc_list'})
toc_container.append(ul_tag)
# helper variables
curr_level = 1
ul_parents = [ul_tag]
# header tags to look for
h_tags_to_find = [f'h{i}' for i in range(1, 7)] # 'h1' - 'h6'
for header in soup.find_all(h_tags_to_find):
next_level = int(header.name[1:])
if curr_level < next_level: # going downstairs
# create sub ul-tag
sub_ul_tag = soup.new_tag('ul', attrs={'class': 'toc_list'})
# connect it with parent ul-tag
ul_parents[-1].append(sub_ul_tag)
# remember the sub-ul-tag
ul_parents.append(sub_ul_tag)
elif curr_level > next_level: # going upstairs
# go back to parent ul-tag
ul_parents = ul_parents[:-1]
curr_level = next_level
# toc-entry as li-a-tag
li_tag = _sub_create_anchor(header)
# add to last ul-tag
ul_parents[-1].append(li_tag)
return soup.prettify(formatter='html5')
在您的所有用例中,这可能并不优雅。我自己将 TOC 放在由数据科学例程(例如 pandas)生成的 HTML 报告之上。