我正在尝试编写一个 Python 脚本,它可以像这样在 Google 表单中提交回复: https ://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/viewform
但是我如何实际发送 POST 以及如何找出这个 POST 实际应该包含的内容?
我正在尝试编写一个 Python 脚本,它可以像这样在 Google 表单中提交回复: https ://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/viewform
但是我如何实际发送 POST 以及如何找出这个 POST 实际应该包含的内容?
第一的pip install requests
您必须将一些特定的表单数据发布到特定的 url,您可以使用请求。form_data dict 参数对应于选项,如果您不需要某些选项,只需将其从 form_data 中删除。
import requests
url = 'https://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/formResponse'
form_data = {'entry.2020959411':'18+ sollte absolute Pflicht sein',
'entry.2020959411':'Alter sollte garkeine Rolle spielen',
'entry.2020959411':'17+ wäre für mich vertretbar',
'entry.2020959411':'16+ wäre für mich vertretbar',
'entry.2020959411':'15+ wäre für mich vertretbar',
'entry.2020959411':'Ausnahmen von der Regel - Dafür?',
'entry.2020959411':'Ausnahmen von der Regel - Dagegen?',
'entry.2020959411':'__other_option__',
'entry.2020959411.other_option_response':'test',
'draftResponse':[],
'pageHistory':0}
user_agent = {'Referer':'https://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/viewform','User-Agent': "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.52 Safari/537.36"}
r = requests.post(url, data=form_data, headers=user_agent)
根据@pigletfly 的回答,我编写了一个用于获取字段名称的小脚本(仅适用于文本字段表单)
import urllib.request
from bs4 import BeautifulSoup
import requests, warnings
def get_questions(in_url):
res = urllib.request.urlopen(in_url)
soup = BeautifulSoup(res.read(), 'html.parser')
get_names = lambda f: [v for k,v in f.attrs.items() if 'label' in k]
get_name = lambda f: get_names(f)[0] if len(get_names(f))>0 else 'unknown'
all_questions = soup.form.findChildren(attrs={'name': lambda x: x and x.startswith('entry.')})
return {get_name(q): q['name'] for q in all_questions}
def submit_response(form_url, cur_questions, verbose=False, **answers):
submit_url = form_url.replace('/viewform', '/formResponse')
form_data = {'draftResponse':[],
'pageHistory':0}
for v in cur_questions.values():
form_data[v] = ''
for k, v in answers.items():
if k in cur_questions:
form_data[cur_questions[k]] = v
else:
warnings.warn('Unknown Question: {}'.format(k), RuntimeWarning)
if verbose:
print(form_data)
user_agent = {'Referer':form_url,
'User-Agent': "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.52 Safari/537.36"}
return requests.post(submit_url, data=form_data, headers=user_agent)
然后,您可以使用 get_questions 函数获取可以填写的字段
TEST_FORM_URL = "https://docs.google.com/forms/d/e/1FAIpQLSfBmvqCVeDA7IZP2_mw_HZ0OTgDk2a0JN4VlY5KScECWC-_yw/viewform"
anno_questions = get_questions(TEST_FORM_URL)
将问题(字段)作为字典获取
{'annotator': 'entry.756364489',
'task': 'entry.1368373366',
'item_id': 'entry.84713541',
'label': 'entry.2072511216',
'session': 'entry.2021127767',
'time': 'entry.1122475936'}
然后使用带有关键字参数的 submit_response 提交
submit_response(TEST_FORM_URL, anno_questions, annotator="TestUser", item_id = 0)
做这样的事情:
import urllib2, urllib
import cookielib
cookieJar = cookielib.LWPCookieJar()
opener = urllib2.build_opener(
urllib2.HTTPCookieProcessor(self.cookieJar), # Create Opener
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0))
# Add Headers
opener.addheaders = [('User-agent', "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36")]
forms = {
"formname": value, # The forms name and the selected value you want
"formname2": value2,
}
data = urllib.urlencode(forms) # Encode data
req = urllib2.Request('http://www.example.com',data) # Send Request
res = opener.open(req) # Open Request
html = res.read() # Read Response
你应该像这样构造它。
要获取表单名称,您需要查看站点的源代码并找到您要输入并提交的表单的名称。
希望这可以帮助
祝你好运:)
这是我的有效脚本:
import urllib
import urllib2
user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
header={'User-Agent' : user_agent}
url = "http://....Your google form"
# values from your form. You will need to include any hidden variables if you want to..
values= {
'entry.asdfsdfsdasd': 'asdfasdfsd',
'draftResponse':'[,,"-asdfasdasdf"]',
'pageHistory':'0',
'fbzx':'-asdfasdfsd'
}
data = urllib.urlencode(values)
urllib2.Request(url, data, header)