代码如下:
import threading
import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69'
}
threads = []
url_details = []
tag_name = input('豆瓣读书标签名:')
def url_extract(url):
wenben = requests.get(url=url, headers=headers).text
wenben = etree.HTML(wenben)
url_detail = wenben.xpath('//li[@class="subject-item"]//a[@class="nbg"]/@href')
url_details.extend(url_detail)
for i in range(0, 200, 20):
thread = threading.Thread(target=url_extract,
args=(f'https://book.douban.com/tag/{tag_name}?start={i}&type=T',))
thread.start()
threads.append(thread)
print('多线程!启动!')
for thread in threads:
thread.join()
print(url_details)
print(len(url_details))
打完收工