0

tiktokuserhtml = Jsoup.connect("https://www.tiktok.com/@" + userSelected).get().html();

尝试使用上面的代码获取要抓取的网页的 HTML 来获取用户 userSelected 的关注者数量。但由于某种原因,输出不是我需要的 HTML,而是一些其他脚本,不包含关注者计数或任何内容。

以下是返回的内容,不确定它来自哪里或它是什么。

     <head> 
      <meta charset="utf-8"> 
      <title>TikTok</title> 
      <link rel="shortcut icon" type="image/x-icon" id="favicon"> 
      <meta name="screen-orientation" content="portrait"> 
      <meta name="x5-orientation" content="portrait"> 
      <meta name="format-detection" content="telephone=no"> 
      <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no, minimum-scale=1, maximum-scale=1, minimal-ui, viewport-fit=cover"> 
      <meta name="apple-mobile-web-app-capable" content="yes"> 
      <meta name="applicable-device" content="pc,mobile"> 
      <link rel="dns-prefetch" href="https://sf16-scmcdn-va.ibytedtos.com"> 
      <script async src="https://sf16-scmcdn-va.ibytedtos.com/goofy/log-sdk/collect/collect-tcpy.js"></script> 
      <script>
                const option = {"title":"tiktok-verify-page","iid":"0","did":"0","app_name":"tiktok","aid":1284,"favicon":"https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/logo.png","mobileIcons":["https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/tiktok_m.png","https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/tiktok_m2x.png","https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/tiktok_m3x.png"],"icons":["https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/tiktok_w.png","https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/tiktok_w2x.png","https://s16.tiktokcdn.com/musical/resource/mtact/static/images/tiktok-logo/tiktok_w3x.png"],"region":"va","type":"slide","verifyConfig":{"code":10000,"type":"verify","subtype":"slide","fp":"verify_f247a6258fc9b72f5c184592017504c0","region":"va","detail":"vyEHWaYR3h3l0QvBa1TJfcWTEm1iLi*qZ9Ker9Xuz9cIMlWHAzsnxqZJFd7pIXwiYqZdAfUUR3ZIJcKq5C7kq8Y4BQCa3Q73*L1sGIxdh9ZDAur6gP17kjAnvsk-bqt1oXayAdy6oIXc4t9LOU8SZ-StiK0C*t1U9iE3QeMY2KZDPP0f0eHSbaXzujk2Pr4Weg8Gi-A*8Vi2L9s3eoGRDfv6WA*6qutY0zF3EBmWAFFuzwSYCCearmgEhRj6oLzGcK1jF9*gsU5dKjNyTmjHOpHmxjpPaak3aAIQHlVcw6urSdnSrvkxmHkF72HHh*5GUofIaKLGt-kCPwDiuZzaFf8Pizy1*IZ7ePZWeEbCFDdKUfU9sXjTeNY."},"lang":"en"};
    
                if(!option.region) {
                    option.region = 'va';
                }
                var verifyTime = new Date().getTime();
                (function(win, export_obj) {
                    win['TeaAnalyticsObject'] = export_obj;
                    if (!win[export_obj]) {
                        function _collect() {
                            _collect.q.push(arguments);
                        }
                        _collect.q = _collect.q || [];
                        win[export_obj] = _collect;            
                    }
                    win[export_obj].l = +new Date();
                })(window, 'collectEvent');
    
                window.collectEvent('page.init', {
                    app_id: option.region === 'cn' ? 2018 : 2740,
                    channel: option.region === 'boe' ? 'cn' : option.region,
                    log: true,
                });
    
                window.collectEvent('page.start');
                window.collectEvent('page.verify_page_load', {
                    aid: option.aid,
                    product_host: location.host,
                    product_path: location.pathname,
                    time: new Date().getTime(),
                    is_success: 0,
                    duration: new Date().getTime() - verifyTime
                })
                window.onbeforeunload = function () {
                    window.collectEvent(document.readyState === 'complete' ? 'page.verify_page_close' : 'page.verify_page_load_close', {
                        product_host: location.host,
                        product_path: location.pathname,
                        aid: option.aid,
                        time: new Date().getTime(),
                        fp: (document.cookie.match(/s_v_web_id=(\w+)/) || [])[1],
                        is_success: Number(!!window.verify_is_success)
                    })
                }
            </script> 
      <script src="https://sf16-scmcdn-va.ibytedtos.com/goofy/sec_sdk_build/3.1.3/captcha/index.js"></script> 
      <script async src="https://sf16-muse-va.ibytedtos.com/obj/eden-va```
4

1 回答 1

0
<script src="https://sf16-scmcdn-va.ibytedtos.com/goofy
             /sec_sdk_build/3.1.3/captcha/index.js">
</script>

看起来他们正在为您的请求提供验证码。您可能会找到一种使用标头或 cookie 绕过它的方法,但该站点似乎不希望您为其创建新的用户代理。

一个想法-也许您可以在 webview 中将该页面呈现给您的用户以传递验证码,然后在 jsoup 中使用生成的标头/cookie(但是它是指纹)用于后续请求。

于 2021-07-22T08:57:20.750 回答