当前位置:   article > 正文

Python爬虫-漫画柜漫画爬取_漫画柜网页

漫画柜网页

代码仅供学习

  1. from selenium import webdriver
  2. from lxml import etree
  3. import time
  4. import random
  5. import requests
  6. import os
  7. def download_pic(urls,name):
  8. i = 1
  9. os.mkdir(fr'C:\Users\msi\Desktop\爬取图片\{name}') #修改储存地址
  10. for url in urls:
  11. headers = {'Referer': 'https://www.mhgui.com/comic/25538/', #修改一下Referer
  12. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36',
  13. }
  14. request1 = requests.get(url,headers = headers)
  15. content = request1.content
  16. with open(fr'C:\Users\msi\Desktop\爬取图片\{name}\{i}.jpg', 'wb') as a:
  17. a.write(content)
  18. i+=1
  19. url = 'https://www.mhgui.com/comic/25538/' #修改你要爬取的漫画柜漫画网页地址
  20. driver_path = r'G:\Python\chromedriver'
  21. option = webdriver.ChromeOptions()
  22. option.add_experimental_option('excludeSwitches', ['enable-automation'])
  23. driver = webdriver.Chrome(executable_path=driver_path,chrome_options=option)
  24. driver.get(url)
  25. html_main = etree.HTML(driver.page_source)
  26. names = html_main.xpath('//div[@class="chapter-list cf mt10"]/ul/li/a/@title')
  27. button_chapters = driver.find_elements_by_xpath('//div[@class="chapter-list cf mt10"]/ul/li/a')
  28. for name,button_chapter in zip(names,button_chapters):
  29. url_lst = []
  30. button_chapter.click()
  31. driver.switch_to.window(driver.window_handles[1])
  32. while True:
  33. content = driver.page_source
  34. html = etree.HTML(content)
  35. url_per_page = html.xpath('//td[@align="center"]/div[2]/img/@src')[0]
  36. url_lst.append(url_per_page)
  37. time.sleep(random.uniform(1,3))
  38. driver.execute_script("window.scrollBy(800,1200)")
  39. try:
  40. next_page_button = driver.find_element_by_id("next")
  41. next_page_button.click()
  42. except:
  43. break
  44. print(url_lst)
  45. download_pic(url_lst,name)
  46. driver.close()
  47. driver.switch_to.window(driver.window_handles[0])
  48. time.sleep(3)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/article/detail/55510
推荐阅读
相关标签
  

闽ICP备14008679号