赞
踩
目录
- import requests
-
- url = 'http://www.baidu.com'
-
- response = requests.get(url = url)
-
- #一个类型和六个属性
- #Response类型
- #print(type(response))
-
- #设置响应的编码格式
- response.encoding = 'utf-8'
-
- #以字符串的形式返回网页源码
- #print(response.text)
-
- #返回一个url地址
- #print(response.url)
-
- #返回二进制数据
- # print(response.content)
-
- #返回响应的状态码
- print(response.status_code)
-
- #获取响应头
- print(response.headers)
- import requests
-
- url = 'http://www.baidu.com/s?'
-
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
- }
-
- data = {
- 'wd':'北京'
- }
-
- #url--请求资源地址 params--参数 kwargs--字典
- response = requests.get(url=url,params=data,headers=headers)
-
- content = response.text
-
- print(content)
-
注: 参数用params传递,参数不需要urlencode编码,不需要请求对象的定制。
- import requests
-
- post_url = 'https://fanyi.baidu.com/sug'
-
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
- }
-
- data = {
- 'kw':'eye'
- }
-
- #url--请求对象地址 data--参数 kwargs--字典
- response = requests.post(url= post_url,data=data,headers=headers)
-
- content = response.text
解决乱码
- import json
- #注:这里是新版json,需要用encode('utf-8),不能用encoding='utf-8'
- obj = json.loads(content.encode('utf-8'))
- print(obj)
注:requests的post请求不需要编解码,post请求的参数是data,不需要请求对象的定制。
- proxy = {
- 'http':'101.251.204.174:8080'
- }
-
- response = requests.get(url=url,params=data,headers=headers,proxies=proxy)
e.g.自动登录古诗文网站
- import requests
-
- #登录页面的url地址
- url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
-
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
- }
-
- #获取页面的源码
- response = requests.get(url=url,headers=headers)
- content = response.text
-
- #解析页面源码 获取__VIEWSTATE __VIEWSTATEGENERATOR
- from bs4 import BeautifulSoup
-
- soup = BeautifulSoup(content,'lxml')
-
- #隐藏域hidden
- #获取__VIEWSTATE
- viewstate = soup.select('#__VIEWSTATE')[0].attrs.get('value')
-
- #获取__VIEWTATEGENERATOR
- viewstategenerator = soup.select('#__VIEWSTATEGENERATOR')[0].attrs.get('value')
-
-
- #获取验证码图片
- code = soup.select('#imgCode')[0].attrs.get('src')
- code_url = 'https://so.gushiwen.cn'+code
-
- #requests里的session()
- #通过session的返回值,使请求变成一个对象
- session = requests.session()
- #验证码的url的内容
- response_code = session.get(code_url)
- #注意此时要使用二进制数据,因为我们要使用图片的下载
- content_code = response_code.content
- #wb的模式就是将二进制数据写入文件
- with open('code.png','wb') as fp:
- fp.write(content_code)
-
- #获取了验证码之后 下载到本地 然后观察验证码 观察之后 然后在控制台输入这个验证码
- real_code = input('请输入验证码:')
-
- #点击登录
- url_post = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'
-
- data_post = {
- '__VIEWSTATE':'',
- '__VIEWSTATEGENERATOR':'',
- 'from':'http://so.gushiwen.cn/user/collect.aspx',
- 'email':'1234567@qq.com',
- 'pwd':'123456',
- 'code':'',
- 'denglu':'登录'
- }
-
- response_post = session.post(url = url_post,headers=headers,data=data_post)
-
- content_post = response_post.text
-
- with open('gushiwen.html','w',encoding='utf-8') as fp:
- fp.write(content_post)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。