赞
踩
任何一个网站,第一件事,观察你要的东西在不在页面源代码 如果在 直接请求url即可 如果不在 装包工具观察,数据究竟是从哪个url加载进来的 方案一,参数太长了,看起来费劲
- import requests
- url="https://movie.douban.com/j/chart/top_list?type=13&interval_id=100%3A90&action=&start=0&limit=20"
- headers = {
- "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
- }
- resp = requests.get(url,headers=headers)
- requests.exceptions.JSONDecodeError: Expecting value 返回的东西不是json
- print(resp.text)#
- dic = resp.json()
- print(dic)
#方案二
- import requests
- headers = {
- "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
- }
- url = "https://movie.douban.com/j/chart/top_list"
- dic= {
- "type": "13",
- "interval_id": "100:90",
- "action": "",
- "start":"0", # 0==>1,20==》2,40=>3
- "limit":"20",
- }
- #发送get请求,并将参数带过去
- resp = requests.get(url,params=dic,headers=headers)
- print(resp.json())
实现
- import requests
- import json
- with open("douban.txt",mode="w",encoding="utf-8") as f:
- for i in range(5):
- start=i*20 #0 20 40 60 80
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
- }
- url = "https://movie.douban.com/j/chart/top_list"
- dic = {
- "type": "13",
- "interval_id": "100:90",
- "action": "",
- "start": start, # 0==>1,20==》2,40=>3
- "limit": "20",
- }
- #每次循环得到一批新的参数
- # print(dic)
- resp = requests.get(url,params=dic,headers=headers)
- # print(resp.json())
- #后续的工作
-
- for item in resp.json():
- # print(item)
- type=item['types']
- types=type[1]
- title=item['title']
- url=item['url']
- f.write(types)
- f.write("|")
- f.write(title)
- f.write("|")
- f.write(url)
- f.write("\n")

或者开头可以换成这个
- import requests
-
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
- }
- for i in range(5):
- url = f"https://movie.douban.com/j/chart/top_list?type=13&interval_id=100%3A90&action=&start={i*20}&limit=20"
- resp=requests.get(url,headers=headers)
- lst=resp.json()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。