我下面的代码获取每个健身房的街道地址,但是健身房开放时间的输出间距有错误。关于我哪里出错的任何想法?
import urlparse
from bs4 import BeautifulSoup
from bs4 import Tag
import requests
import time
import csv
sitemap = 'https://www.planetfitness.com/sitemap'
sitemap_content = requests.get(sitemap).content
soup = BeautifulSoup(sitemap_content, 'html.parser')
atags = soup.select('td[class~=club-title] > a[href^="/gyms"]')
links = [atag.get('href') for atag in atags]
with open('gyms.csv', 'w') as gf:
gymwriter = csv.writer(gf)
for link in links:
gymurl = urlparse.urljoin(sitemap, link)
sitemap_content = requests.get(gymurl).content
soup = BeautifulSoup(sitemap_content, 'html.parser')
gymrow = [ gymurl ]
address_line1 = soup.select('p[class~=address] > span[class~=address-line1]')
gymrow.append(address_line1[0].text)
locality = soup.select('p[class~=address] > span[class~=locality]')
gymrow.append(locality[0].text)
administrative_area = soup.select('p[class~=address] > span[class~=administrative-area]')
gymrow.append(administrative_area[0].text)
postal_code = soup.select('p[class~=address] > span[class~=postal-code]')
gymrow.append(postal_code[0].text)
country = soup.select('p[class~=address] > span[class~=country]')
gymrow.append(country[0].text)
strongs = soup.select('div > strong')
for strong in strongs:
if strong.text == 'Club Hours':
for sibling in strong.next_siblings:
if isinstance(sibling, Tag):
hours = sibling.text
gymrow.append(hours)
break
print(gymrow)
gymwriter.writerow(gymrow)
time.sleep(3)
感谢您的帮助!