下面提到的代码 1 只给了我 1 行,而代码 2 给了我所有的行。code-1 给我的 1 行是递归的最后一个元素(该行是 code-2 输出的最后一个元素)。
请仔细检查代码 1 和代码 2 之间的区别,并帮助我查明导致此问题的问题。
代码 1:
# -*- coding: cp1252 -*-
import csv
import urllib2
import sys
import urllib
import time
import mechanize
import cookielib
from bs4 import BeautifulSoup
from itertools import islice
cy_q = int(time.strftime("%m"))
if cy_q <= 3:
q = 1
elif cy_q <=6:
q = 2
elif cy_q <=9:
q = 3
else:
q = 4
month = int(time.strftime("%m"))
if month <= 6:
fy = time.strftime("%Y")
else:
fy = int(time.strftime("%Y")) +1
if month <=3:
fy_q = 3
elif month <=6:
fy_q = 4
elif month <= 9:
fy_q = 1
else:
fy_q = 2
urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
]
for url in urls:
page= urllib2.urlopen(url).read()
soup = BeautifulSoup(page)
items = soup.findAll('h3', {"class": "title"})
prices_int = soup.findAll('span', {"class": "price"})
prices_dec = [None]*100
j = 0
i = 0
for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
try:
check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
except KeyError:
prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
i = i + 1
j = j + 1
with open('sfr_oemtest.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',')
spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
"Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
"Plan Data","Plan Minutes"])
for item, price_int, price_dec in zip(items,prices_int,prices_dec):
textcontent = u' '.join(item.stripped_strings)
name_1 = unicode(textcontent).encode('utf8').replace("é","").replace("RECONDITIONNE","Refurbished").replace("reconditionn","Refurbished").replace("Tablette","Tablet").replace("Noir et Blanc","Black and White").replace("Remis à neuf","Refurbished").replace("Remis à Neuf","Refurbished").replace("Reconditionn","Refurbished").replace("Go","GB").replace("Bleu Nuit","Midnight Blue").replace("Noir","Black").replace("Blanc","White").replace("Bleu","Blue").replace("Rose","Pink").replace("Rouge","Red").replace("Gris","Grey").strip()
oem = list(name_1)
pos = oem.index(" ")
if name_1.find('Refurbished') == -1:
name = name_1
refur = "N"
else:
name = name_1.replace("Refurbished","")
refur = "Y"
if name_1:
spamwriter.writerow([time.strftime("%Y-%m-%d"),time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
time.strftime("%A") , "France", "SFR",name[0:pos],name,"",refur,"","","","24 Months",
"€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])
代码 2:
urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
]
for url in urls:
page= urllib2.urlopen(url).read()
soup = BeautifulSoup(page)
items = soup.findAll('h3', {"class": "title"})
prices_int = soup.findAll('span', {"class": "price"})
prices_dec = [None]*100
j = 0
i = 0
for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
try:
check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
except KeyError:
prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
i = i + 1
j = j + 1
with open('Pricing_Updated.csv', 'ab') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',')
# spamwriter.writerow(["Date","Month","Day of Week","Geography","Mobile Operator","Device Name","Price","Monthly Price","Plan"])
# spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
# "Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
# "Plan Data","Plan Minutes"])
for item, price_int, price_dec in zip(items,prices_int,prices_dec):
textcontent = u' '.join(item.stripped_strings)
if textcontent:
spamwriter.writerow([time.strftime("%Y-%m-%d"),
time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
time.strftime("%A") , "France", "SFR","",
unicode(textcontent).encode('utf8')
.replace("é","")
.replace("RECONDITIONNE","Refurbished")
.replace("reconditionn","Refurbished")
.replace("Tablette","Tablet")
.replace("Noir et Blanc","Black and White")
.replace("Remis à neuf","Refurbished")
.replace("Remis à Neuf","Refurbished")
.replace("Reconditionn","Refurbished")
.replace("Go","GB")
.replace("Bleu Nuit","Midnight Blue")
.replace("Noir","Black")
.replace("Blanc","White")
.replace("Bleu","Blue")
.replace("Rose","Pink")
.replace("Rouge","Red")
.replace("Gris","Grey"),"","","","","","24 Months",
"€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])