赞
踩
在前文爬取图片的基础上,修改其中的json筛选条件就可以
https://blog.csdn.net/weixin_43596589/article/details/122215981
还是因为线程下载会个别出错,所以我选择生成的链接使用迅雷
get_pics_url函数如下
def get_pics_url(self): i = 1 url_list=[] while True: url = self.start_url + '&page={}'.format(i) headers = {'User-Agent': get_ua()} r = requests.get(url, headers=headers) _json = json.loads(r.text) items = _json["data"]["cards"] flag = _json['ok'] if flag == 1: # 爬取数据标志+一个手动控制标志 for v in items: picslist = v.get('mblog') if picslist is not None: pageInfoList = picslist.get('page_info') if pageInfoList is not None: urlsList = pageInfoList.get('urls') if urlsList is not None: img_url = urlsList.get('mp4_720p_mp4') if img_url is not None: url_list.append(img_url) else: img_url = urlsList.get('mp4_hd_mp4') if img_url is None: print(urlsList) url_list.append(img_url) else: #1.06页数显示出现问题 t1.insert(END, f'***在第{i}页终止***\n') t1.see(END) t1.update() if r1_var.get() == 1: big_dir=disk+':/WeiBo_Pics' os.startfile(big_dir) break i += 1 print("url共有个数") print(len(url_list)) # return url_list #网上找的将整个list写入txt file = open(user_name_selected+'mp4.txt', 'w') for i in range(len(url_list)): s = str(url_list[i]).replace('{', '').replace('}', '').replace("'", '').replace(':', ',') + '\n' file.write(s) file.close() return ""
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。