赞
踩
python库安装
idle:
**pip install 库名**
Pycharm:
1. Requests库
使用方法:import Requests
主要使用方法:
下面代码但是自己写的爬取yande.re图片网的代码
首先分析它的图片地址规律,https://yande.re/post/show/123123,可以知道他是根据右面数字进行下载的。
使用BeautifulSoup分析返回的context,通过find(attrs=“class”:“image”)【找到所有有次属性的标签】,return xxx[‘src’],拿到图片的链接。
再请求图片的链接,把返回的二进制content,通过write保存
with open(要保存的图片地址【包含图片名称及后缀】,‘wb’【权限,以二进制形式写入文件,若存在则覆盖】) as f
f.write(content)。
到此写入完成。
from requests.exceptions import RequestException from requests.exceptions import ConnectTimeout from requests.exceptions import ReadTimeout from requests.exceptions import ConnectionError from urllib.parse import urlencode from bs4 import BeautifulSoup import os import requests import re import json import time def openurl(url): try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'} response = requests.get(url,timeout=(30,15), headers=headers) if response.status_code == 200: return response return None except RequestException: return None def searchYandeAndKonSrc(content): soup=BeautifulSoup(content, "html.parser") soup_list=soup.find(attrs={"class" :"image"}) return soup_list['src'] # url="https://yande.re/post/show/"+q # address="E:\CatchPic\test" # r=openurl("https://yande.re/post/show/3453453") # url1=searchYandeAndKonSrc(r.content) # r1=openurl(url1) # with open("E://CatchPic//test//"+str(q)+".jpg", 'wb') as f: # f.write(r1.content) # print("保存成功") q = input("请输入要从那张开始爬取(倒叙):") while int(q) > 0: try: print("第"+str(q)+"张:") starttimeshow=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) starttime=time.perf_counter() print("开始时间:"+starttimeshow) r =openurl("https://yande.re/post/show/"+str(q)) src = "E://CatchPic//yande1//" + str(q) + ".jpg" q = int(q)-1 url1=searchYandeAndKonSrc(r.content) r1=openurl(url1) #print(r1.content) with open(src, 'wb') as f: f.write(r1.content) print("保存成功") endTimeshow=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) endTime=time.perf_counter() print("结束时间:"+endTimeshow) print("耗时:"+str(endTime-starttime)) print("-------------------------------") except AttributeError: print("第"+str(q)+"张图片不存在") except ReadTimeout: print("第"+str(q)+"张图片,读取超时") time.sleep(5) except ConnectionError: print("第" + str(q) + "张图片,地址链接失败") except ConnectTimeout: print("连接超时") time.sleep(60) except TypeError: print("该连接没有符合条件的图片")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。