我正在尝试使用 Python 以编程方式使用 Wikimedia Commons 查询服务 [1],但无法通过 OAuth 1 进行身份验证。
下面是一个自包含的 Python 示例,它不能按预期工作。预期的行为是返回结果集,而是返回登录页面的 HTML 响应。您可以使用pip install --user sparqlwrapper oauthlib certifi
. 然后应该为脚本提供一个文本文件的路径,该文件包含在申请仅所有者令牌后给出的粘贴输出[2]。例如
Consumer token
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Consumer secret
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Access token
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Access secret
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
[1] https://wcqs-beta.wmflabs.org/;https://diff.wikimedia.org/2020/10/29/sparql-in-the-shadow-of-structured-data-on-commons/
[2] https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers
import sys
from SPARQLWrapper import JSON, SPARQLWrapper
import certifi
from SPARQLWrapper import Wrapper
from functools import partial
from oauthlib.oauth1 import Client
ENDPOINT = "https://wcqs-beta.wmflabs.org/sparql"
QUERY = """
SELECT ?file WHERE {
?file wdt:P180 wd:Q42 .
}
"""
def monkeypatch_sparqlwrapper():
# Deal with old system certificates
if not hasattr(Wrapper.urlopener, "monkeypatched"):
Wrapper.urlopener = partial(Wrapper.urlopener, cafile=certifi.where())
setattr(Wrapper.urlopener, "monkeypatched", True)
def oauth_client(auth_file):
# Read credential from file
creds = []
for idx, line in enumerate(auth_file):
if idx % 2 == 0:
continue
creds.append(line.strip())
return Client(*creds)
class OAuth1SPARQLWrapper(SPARQLWrapper):
# OAuth sign SPARQL requests
def __init__(self, *args, **kwargs):
self.client = kwargs.pop("client")
super().__init__(*args, **kwargs)
def _createRequest(self):
request = super()._createRequest()
uri = request.get_full_url()
method = request.get_method()
body = request.data
headers = request.headers
new_uri, new_headers, new_body = self.client.sign(uri, method, body, headers)
request.full_url = new_uri
request.headers = new_headers
request.data = new_body
print("Sending request")
print("Url", request.full_url)
print("Headers", request.headers)
print("Data", request.data)
return request
monkeypatch_sparqlwrapper()
client = oauth_client(open(sys.argv[1]))
sparql = OAuth1SPARQLWrapper(ENDPOINT, client=client)
sparql.setQuery(QUERY)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
print("Results")
print(results)
我也尝试过不使用 SPARQLWrapper,但只使用 requests+requests_ouathlib。但是,我遇到了同样的问题 --- 返回了登录页面的 HTML --- 所以看起来它实际上可能是 Wikimedia Commons 查询服务的问题。
import sys
import requests
from requests_oauthlib import OAuth1
def oauth_client(auth_file):
creds = []
for idx, line in enumerate(auth_file):
if idx % 2 == 0:
continue
creds.append(line.strip())
return OAuth1(*creds)
ENDPOINT = "https://wcqs-beta.wmflabs.org/sparql"
QUERY = """
SELECT ?file WHERE {
?file wdt:P180 wd:Q42 .
}
"""
r = requests.get(
ENDPOINT,
params={"query": QUERY},
auth=oauth_client(open(sys.argv[1])),
headers={"Accept": "application/sparql-results+json"}
)
print(r.text)