赞
踩
# -*- coding: UTF-8 -*- import time from queue import Queue from requests_html import HTMLSession import requests import threading now = time.time() get_links_list = [] session = HTMLSession() # url_1 用于获取当前图片页码,urls2为当前页码 url_1 = 'https://wallhaven.cc/search?q=id%3A24972&sorting=random&ref=fp&seed=WbEycL&page=2' urls2 = 'https://wallhaven.cc/search?q=id%3A24972&sorting=random&ref=fp&seed=WbEycL&page=' # 需要爬取的页面,30为爬取的内容至30页,如果小于30页则获取所有页码图片 page_number = 30 # 保存图片至指定页面 def save_image(url, title): img_response = requests.get(url) with open('.\Wallpapers\\'+title+'.png', 'wb') as file: file.write(img_response.content) # 获取页码数 def get_page(url_1): r = session.get(url_1) news = r.html.find('div > section > header > h2') for new in news: page1 = new.text if 'Page' in page1: page2 = str(page1).split(' ')[-1] return page2 # 获取图片下载连接 def get_picturs_url(page, urls2): page = int(page) if page <= page_number: print(f'当前页码数是:{page} ') for page1 in range(page): url = f'{urls2}{page1}' r = session.get(url) news = r.html.find('div > section > ul > li > figure > a') for new in news: get_links_list.append(str(new.absolute_links).replace('\'', '').replace('{', '').replace('}', '')) else: for page2 in range(page_number): url = f'{urls2}{page2}' r = session.get(url) news = r.html.find('div > section > ul > li > figure > a') for new in news: get_links_list.append(str(new.absolute_links).replace('\'', '').replace('{', '').replace('}', '')) return get_links_list # 下载图片 def get_picture(links, output_q): r = session.get(links) items_img = r.html.find('body > main > section > div > img') for imgs in items_img: url = imgs.attrs['src'] title = imgs.attrs['data-wallpaper-id'] print(url + title) save_image(url, title) # 通过多线程调用下载函数 if __name__ == '__main__': page = get_page(url_1) get_picturs_url(page, urls2) for links in get_links_list: time.sleep(2) t = threading.Thread(target=get_picture, args=(links, Queue())) t.start() print(time.time() - now)
模块安装:
pip install requests-html
pip install requests
参考文档:
https://docs.python-requests.org/projects/requests-html/en/latest/
https://docs.python-requests.org/en/latest/
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。