0

The main purpose is to make multiple threads basing on count of pages. Is there any way to make it easier than just making ton of ifs? Cannot get an idea of how to solve this properly.

import threading

from bs4 import BeautifulSoup
from os import system, name
import requests
import sqlite3
import time
from dhooks import Webhook, Embed
import pprint
from threading import Thread


def page_check(URL):
    page = requests.get(URL)
    soup = BeautifulSoup(page.text, 'html.parser')
    products = soup.find('div', attrs={'class': 'category-products'}).findAll('ul', attrs={'class': 'products-grid'})
    print(products)

def page_start(count):
    count = str(count)
    URL = 'html/page/' + count + '/ke'
    threading.Thread(target=page_check(URL)).start()

def number_of_pages():
    URL = 'html/page/'
    sites = requests.get(URL)
    soup = BeautifulSoup(sites.text, 'html.parser')
    number = soup.find('p', attrs={'class': 'amount mobilehidden'}).text
    number = number.split()
    number = int(number[5])
    pages1 = number / 48
    pages2 = int(number / 48)
    if (pages1 / pages2) > 1:
        pages = pages2 + 1
    elif (pages1 / pages2) == 1:
        pages = pages2
    return pages

def main():
    pages = number_of_pages()

    if pages == 1:
        threading.Thread(target=page_check('htmlpage/p/1/limit/48.html')).start()
    if pages == 2:
        threading.Thread(target=page_check('htmlpage/p/1/limit/48.html')).start()
        threading.Thread(target=page_check('htmlpage/p/2/limit/48.html')).start()
    if pages == 3:
        threading.Thread(target=page_check('htmlpage/p/1/limit/48.html')).start()
        threading.Thread(target=page_check('htmlpage/p/2/limit/48.html')).start()
        threading.Thread(target=page_check('htmlpage/p/3/limit/48.html')).start()

    main()
4

2 回答 2

3

您正在寻找 for循环range方法。

num_pages = 3
for i in range(num_pages):
    page_start(i)
# 0
# 1
# 2

range() 将提供从 0 到(但不包括)目标数的可迭代值。您还可以提供一个起始值,例如 range(1, 4) 将计数 1, 2, 3。有关更多选项,请参阅文档。

编辑:这只是解决了如何避免多个if语句。要启动多个线程,请使用 pythons 并发模块:https ://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor

于 2020-07-26T10:59:44.767 回答
0

将此代码添加到 main 中,我将执行它。谢谢大家

code = ''
while pages > 0:
    count = str(count)
    URL = 'htmlpage' + count + '/limit/48.html'
    code = code + 'threading.Thread(target=page_check(' + "'" +  URL + "'" + ')).start()' + '\n'
    pages = pages - 1
    count = int(count)
    count = count + 1
于 2020-07-26T11:26:08.810 回答