python 获取指定豆瓣标签前20页书籍url

python 获取指定豆瓣标签前20页书籍url

代码如下:

import threading

import requests
from lxml import etree

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69'
}
threads = []
url_details = []

tag_name = input('豆瓣读书标签名:')


def url_extract(url):
    wenben = requests.get(url=url, headers=headers).text
    wenben = etree.HTML(wenben)
    url_detail = wenben.xpath('//li[@class="subject-item"]//a[@class="nbg"]/@href')
    url_details.extend(url_detail)


for i in range(0, 200, 20):
    thread = threading.Thread(target=url_extract,
                              args=(f'https://book.douban.com/tag/{tag_name}?start={i}&type=T',))
    thread.start()
    threads.append(thread)
print('多线程!启动!')
for thread in threads:
    thread.join()
print(url_details)
print(len(url_details))

打完收工