我正在尝试从 LinkedIn 帐户获取公司信息,但我无法在正文中获取任何内容。你能告诉我有什么问题吗?
我需要得到
company
website
industry
employes
etc.
但我做不到。我收到的唯一 html 如下所示:
代码:
import requests
import webbrowser,html5lib
from bs4 import BeautifulSoup
linkdine_company_about=requests.get('https://www.linkedin.com/company/exxonmobil')
html=BeautifulSoup(linkdine_company_about.text,'html.parser')
print(html)
跑:
<pre>
exxonmobil
https://www.linkedin.com/company/exxonmobil
<html><head>
<script type="text/javascript">
window.onload = function () {
// Parse the tracking code from cookies.
var trk = "bf";
var trkInfo = "bf";
var cookies = document.cookie.split("; ");
for (var i = 0; i < cookies.length; ++i) {
if ((cookies[i].indexOf("trkCode=") == 0) && (cookies[i].length > 8)) {
trk = cookies[i].substring(8);
} else if ((cookies[i].indexOf("trkInfo=") == 0) && (cookies[i].length > 8)) {
trkInfo = cookies[i].substring(8);
}
}
if (window.location.protocol == "http:") {
// If "sl" cookie is set, redirect to https.
for (var i = 0; i < cookies.length; ++i) {
if ((cookies[i].indexOf("sl=") == 0) && (cookies[i].length > 3)) {
window.location.href = "https:" +
window.location.href.substring(window.location.protocol.length);
return;
}
}
}
// Get the new domain. For international domains such as
// fr.linkedin.com, we convert it to www.linkedin.com
var domain = "www.linkedin.com";
if (domain != location.host) {
var subdomainIndex = location.host.indexOf(".linkedin");
if (subdomainIndex != -1) {
domain = "www" + location.host.substring(subdomainIndex);
}
}
window.location.href = "https://" + domain + "/authwall?trk=" + trk + "&trkInfo=" + trkInfo +
"&originalReferer=" + document.referrer.substr(0, 200) +
"&sessionRedirect=" + encodeURIComponent(window.location.href);
}
</script>
</head></html>
***
Process finished with exit code 0
</pre>