赞
踩
程序文件:xajh_cp.py 结果保存:./tencent_comment/xajh_cp.txt
深度长评的URL地址格式为:https://video.coral.qq.com/filmreviewr/c/upcomment/[视频id]?&reqnum=3&commentid=[评论id]
fildder
requests-re
用户代理
《新笑傲江湖》DVD版评论
抓包经过简化得到的url: https://video.coral.qq.com/filmreviewr/c/upcomment/4baf2nzoljqyobl?&reqnum=3&commentid=0
xajh_cp.py
import requests import re import random def get_html(url,params): uapools=[ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14' ] thisua=random.choice(uapools) headers={"User-Agent":thisua} r=requests.get(url,headers=headers,params=params) r.raise_for_status() r.encoding=r.apparent_encoding return r.text def parse_page(infolist,data): titlepat= '"title":"(.*?)"' commentpat='"abstract":"(.*?)"' lastpat='"last":"(.*?)"' titleall=re.compile(titlepat,re.S).findall(data) commentall=re.compile(commentpat,re.S).findall(data) next_cid=re.compile(lastpat).findall(data)[0] infolist.append([titleall[:len(commentall)],commentall]) return next_cid def print_comment_list(infolist): j=0 for page in infolist: print('第'+str(j+1)+'页\n') titleall=page[0] commentall=page[1] for i in range(0,len(commentall)): print('='*30) print('评论标题:'+eval('u'+"'"+titleall[i]+"'")+'\n') print('评论内容:'+eval('u'+"'"+commentall[i]+"'")+'\n') j+=1 def save_to_txt(infolist,path): fw=open(path,'w+',encoding='utf-8') j=0 for page in infolist: fw.write('第'+str(j+1)+'页\n') titleall=page[0] commentall=page[1] for i in range(0,len(commentall)): fw.write('='*30+'\n') fw.write('评论标题:'+eval('u'+"'"+titleall[i]+"'")+'\n') fw.write('评论内容是:'+eval('u'+"'"+commentall[i]+"'")+'\n') j+=1 fw.close() def main(): infolist=[] #page_nnum x 2(titleall,commentall) x req_num vid= '4baf2nzoljqyobl'; next_cid='0'; page_num=2 for i in range(page_num): url='https://video.coral.qq.com/filmreviewr/c/upcomment/'+vid+'?' params={'commentid': next_cid,'reqnum': '3'} html=get_html(url,params) next_cid=parse_page(infolist,html) print_comment_list(infolist) save_to_txt(infolist,'./tencent_comment/xajh_cp.txt') main()
程序文件:xajh_dp.py 结果保存:./tencent_comment/xajh_dp.txt
全部短评评论的URL地址格式为:https://video.coral.qq.com/varticle/[视频编号]/comment/v2?&orinum=[返回评论个数]&cursor=[评论标号]"
fildder
requests-re
用户代理
《新笑傲江湖》DVD版评论
抓包经过简化得到的url: https://video.coral.qq.com/varticle/1001103527/comment/v2?&orinum=12&cursor=0
xajh_cp.py
import requests import re import random def get_html(url,params): uapools=[ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14' ] thisua=random.choice(uapools) headers={"User-Agent":thisua} r=requests.get(url,headers=headers,params=params) r.raise_for_status() r.encoding=r.apparent_encoding r.encoding='utf-8' # 不加此句出现乱码 return r.text def parse_page(infolist,data): commentpat='"content":"(.*?)"' lastpat='"last":"(.*?)"' commentall=re.compile(commentpat,re.S).findall(data) next_cid=re.compile(lastpat).findall(data)[0] infolist.append(commentall) return next_cid def print_comment_list(infolist): j=0 for page in infolist: print('第'+str(j+1)+'页\n') commentall=page for i in range(0,len(commentall)): print('评论内容:'+commentall[i]+'\n') j+=1 def save_to_txt(infolist,path): fw=open(path,'w+',encoding='utf-8') j=0 for page in infolist: fw.write('第'+str(j+1)+'页\n') commentall=page for i in range(0,len(commentall)): fw.write('评论内容:'+commentall[i]+'\n') j+=1 fw.close() def main(): infolist=[] vid='1001103527'; cid = "0"; page_num=2 url = 'https://video.coral.qq.com/varticle/'+vid+'/comment/v2' for i in range(page_num): params={'orinum':'10','cursor':cid} html=get_html(url,params) cid=parse_page(infolist,html) print_comment_list(infolist) save_to_txt(infolist,'./tencent_comment/xajh_dp.txt') main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。