赞
踩
代码仅供学习
- from selenium import webdriver
- from lxml import etree
- import time
- import random
- import requests
- import os
-
- def download_pic(urls,name):
- i = 1
- os.mkdir(fr'C:\Users\msi\Desktop\爬取图片\{name}') #修改储存地址
- for url in urls:
- headers = {'Referer': 'https://www.mhgui.com/comic/25538/', #修改一下Referer
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36',
- }
- request1 = requests.get(url,headers = headers)
- content = request1.content
- with open(fr'C:\Users\msi\Desktop\爬取图片\{name}\{i}.jpg', 'wb') as a:
- a.write(content)
- i+=1
-
-
- url = 'https://www.mhgui.com/comic/25538/' #修改你要爬取的漫画柜漫画网页地址
- driver_path = r'G:\Python\chromedriver'
- option = webdriver.ChromeOptions()
- option.add_experimental_option('excludeSwitches', ['enable-automation'])
- driver = webdriver.Chrome(executable_path=driver_path,chrome_options=option)
- driver.get(url)
- html_main = etree.HTML(driver.page_source)
- names = html_main.xpath('//div[@class="chapter-list cf mt10"]/ul/li/a/@title')
- button_chapters = driver.find_elements_by_xpath('//div[@class="chapter-list cf mt10"]/ul/li/a')
-
- for name,button_chapter in zip(names,button_chapters):
- url_lst = []
- button_chapter.click()
- driver.switch_to.window(driver.window_handles[1])
- while True:
- content = driver.page_source
- html = etree.HTML(content)
- url_per_page = html.xpath('//td[@align="center"]/div[2]/img/@src')[0]
- url_lst.append(url_per_page)
- time.sleep(random.uniform(1,3))
- driver.execute_script("window.scrollBy(800,1200)")
- try:
- next_page_button = driver.find_element_by_id("next")
- next_page_button.click()
- except:
- break
- print(url_lst)
- download_pic(url_lst,name)
- driver.close()
- driver.switch_to.window(driver.window_handles[0])
- time.sleep(3)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。