赞
踩
import requests from urllib.parse import urlencode import os from hashlib import md5 from multiprocessing.pool import Pool def get_page(offset): params = { 'aid': '24', 'app_name': 'web_search', 'offset': offset, 'format': 'json', 'keyword': '街拍', 'autoload': 'true', 'count': '20', 'cur_tab': '3' } url = 'https://www.toutiao.com/api/search/content/?' + urlencode(params) header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} try: response = requests.get(url,headers=header) if response.status_code == 200: #print('连接成功') return response.json() except requests.ConnectionError: print("连接失败") return None def get_images(json): #print(json) if json.get('data'): for item in json.get('data'): title = item.get('title') images = item.get('image_list') if images: for image in images: yield{ 'image': image.get('url'), 'title': title } def save_images(item): if not os.path.exists(item.get('title')): os.mkdir(item.get('title')) try: response = requests.get(item.get('image')) if response.status_code == 200: # 这里使用 将图片的unicode file_path = '{0}/{1}.{2}'.format(item.get('title').replace(' ',''), md5(response.content).hexdigest(), 'jpg') if not os.path.exists(file_path): with open(file_path,'wb') as f: f.write(response.content) else: print('Already Download', file_path) except requests.ConnectionError: print('Failed to Save Image') def main(offset): json = get_page(offset) for item in get_images(json): #print(item) save_images(item) GROUP_START = 1 GROUP_END = 1 if __name__ == '__main__': os.chdir('jiepai') # 打开文件夹,在该文件夹下产生文件 pool = Pool() # Pool 多进程下载 groups = ([x*20 for x in range(GROUP_START, GROUP_END + 1)]) pool.map(main, groups) pool.close() pool.join()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。