1

我正在尝试使用 python scabler 脚本从百度地图中获取地址信息。以下是代码:

# -*- coding: utf-8 -*-
import requests
import re
import csv
import time
def BusinessFromBaiduDitu(citycode = '287',key_word='筛网',pageno=0):
    parameter = {
    "newmap": "1",
    "reqflag": "pcmap",
    "biz": "1",
    "from": "webmap",
    "da_par": "direct",
    "pcevaname": "pc4.1",
    "qt": "con",
    "c": citycode,        # 城市代码
    "wd": key_word,       # 搜索关键词
    "wd2": "",
    "pn": pageno,         # 页数
    "nn": pageno * 10,
    "db": "0",
    "sug": "0",
    "addr": "0",
    "da_src": "pcmappg.poi.page",
    "on_gel": "1",
    "src": "7",
    "gr": "3",
    "l": "12",
    "tn": "B_NORMAL_MAP",
        # "u_loc": "12621219.536556,2630747.285024",
    "ie": "utf-8",
    # "b": "(11845157.18,3047692.2;11922085.18,3073932.2)",  #这个应该是地理位置坐标,可以忽略
    "t": "1468896652886"}

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36(KHTML, like Gecko) Chrome/56.0.2924.87Safari/537.36'}
url = 'http://map.baidu.com/'
htm = requests.get(url, params=parameter, headers=headers)
htm = htm.text.encode('latin-1').decode('unicode_escape')  # 转码
pattern = r'(?<=\baddress_norm":"\[).+?(?="ty":)'
htm = re.findall(pattern, htm)  # 按段落匹配

for r in htm:
    pattern = r'(?<=\b"\},"name":").+?(?=")'
    name = re.findall(pattern, r)
    #if not name:
    pattern = r'(?<=\b,"name":").+?(?=")'
    name = re.findall(pattern, r)
    print(name[0])  # 名称

    pattern = r'.+?(?=")'
    adr = re.findall(pattern, r)
    pattern = r'\(.+?\['
    address = re.sub(pattern, ' ', adr[0])
    pattern = r'\(.+?\]'
    address = re.sub(pattern, ' ', address)
    print(address)  # 地址

    pattern = r'(?<="phone":").+?(?=")'
    phone = re.findall(pattern, r)
    try:
        if phone[0] and '",' != phone[0]:
            phone_list = phone[0].split(sep=',')
        for number in phone_list:
            if re.match('1', number):
                print(citycode+name[0]+','+address+','+number)              
                writer.writerow((name[0], address, number))
except:
    continue
print(citycode + '  ' + key_word + '  ' + str(pageno))

运行此脚本时,我收到以下错误:

Traceback (most recent call last): File "E:\map.py", line 41, in <module> htm = requests.get(url, params=parameter, headers=headers) NameError: name 'parameter' is not defined 

正如我所看到的,“参数”是在 def 模块中定义的。为什么会出现这个错误?我是 python 新手,我将不胜感激。

此致。

孙业平

4

1 回答 1

0

由于几个原因,参数未定义。您正在定义函数 BusinessFromBaiduDitu,但您没有调用它或从函数返回数据。

这是一个从函数返回数据的示例:

def test():
     a = {"Test1":"1"}
     return a

outside_a = test()
print(outside_a)
于 2017-04-01T16:08:27.850 回答