赞
踩
win + R 输入cmd 输入安装命令 pip install 模块名 (如果你觉得安装速度比较慢, 你可以切换国内镜像源)
# 数据请求模块 第三方模块 需要安装 pip install requests
import requests
# 数据解析模块 第三方模块 需要安装 pip install parsel
import parsel
# 导入csv模块 内置模块 不需要安装
import csv # 固定模板
# 导入pandas模块
import pandas as pd
# 模拟浏览器
headers = {
# 用户代理 表示浏览器基本身份信息
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
}
# 请求链接
url = 'https://cs.lianjia.com/ershoufang/'
# 发送请求
response = requests.get(url=url, headers=headers)
# 输出内容 <Response [200]> 响应对象 表示请求成功
print(response)
我们这次选用css选择器: 根据标签属性提取数据内容
selector = parsel.Selector(response.text) # 选择器对象
# 获取所有房源所在li标签
lis = selector.css('.sellListContent li .info')
for li in lis: title = li.css('.title a::text').get() # 标题 area_info = li.css('.positionInfo a::text').getall() # 区域信息 area_1 = area_info[0] # 小区 area_2 = area_info[1] # 区域 totalPrice = li.css('.totalPrice span::text').get() # 总价 unitPrice = li.css('.unitPrice span::text').get().replace('元/平', '') # 单价 houseInfo = li.css('.houseInfo::text').get().split(' | ') # 房源信息 HouseType = houseInfo[0] # 户型 HouseArea = houseInfo[1].replace('平米', '') # 面积 HouseFace = houseInfo[2] # 朝向 HouseInfo_1 = houseInfo[3] # 装修 fool = houseInfo[4] # 楼层 HouseInfo_2 = houseInfo[-1] # 建筑结构 href = li.css('.title a::attr(href)').get() # 详情页 dit = { '标题': title, '小区': area_1, '区域': area_2, '总价': totalPrice, '单价': unitPrice, '户型': HouseType, '面积': HouseArea, '朝向': HouseFace, '装修': HouseInfo_1, '楼层': fool, '年份': date, '建筑结构': HouseInfo_2, '详情页': href, } print(dit)
f = open('二手房.csv', mode='w', encoding='utf-8', newline='') csv_writer = csv.DictWriter(f, fieldnames=[ '标题', '小区', '区域', '总价', '单价', '户型', '面积', '朝向', '装修', '楼层', '年份', '建筑结构', '详情页', ]) csv_writer.writeheader()
from pyecharts import options as opts from pyecharts.charts import Pie from pyecharts.faker import Faker c = ( Pie() .add( "", [ list(z) for z in zip(house_type, house_num) ], center=["40%", "50%"], ) .set_global_opts( title_opts=opts.TitleOpts(title="二手房源户型分布"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # .render("pie_scroll_legend.html") ) c.load_javascript()
face_type = df['朝向'].value_counts().index.to_list() face_num = df['朝向'].value_counts().to_list() c = ( Pie() .add( "", [ list(z) for z in zip(face_type, face_num) ], center=["40%", "50%"], ) .set_global_opts( title_opts=opts.TitleOpts(title="二手房源朝向分布"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # .render("pie_scroll_legend.html") ) c.render_notebook()
face_type = df['装修'].value_counts().index.to_list() face_num = df['装修'].value_counts().to_list() c = ( Pie() .add( "", [ list(z) for z in zip(face_type, face_num) ], center=["40%", "50%"], ) .set_global_opts( title_opts=opts.TitleOpts(title="二手房源装修分布"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # .render("pie_scroll_legend.html") ) c.render_notebook()
face_type = df['年份'].value_counts().index.to_list() face_num = df['年份'].value_counts().to_list() c = ( Pie() .add( "", [ list(z) for z in zip(face_type, face_num) ], center=["40%", "50%"], ) .set_global_opts( title_opts=opts.TitleOpts(title="二手房源年份分布"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # .render("pie_scroll_legend.html") ) c.render_notebook()
face_type = df['建筑结构'].value_counts().index.to_list() face_num = df['建筑结构'].value_counts().to_list() c = ( Pie() .add( "", [ list(z) for z in zip(face_type, face_num) ], center=["40%", "50%"], ) .set_global_opts( title_opts=opts.TitleOpts(title="二手房源建筑结构分布"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) # .render("pie_scroll_legend.html") ) c.render_notebook()
avg_salary = df.groupby('区域')['总价'].mean() CityType = avg_salary.index.tolist() CityNum = [int(a) for a in avg_salary.values.tolist()] from pyecharts.charts import Bar # 创建柱状图实例 c = ( Bar() .add_xaxis(CityType) .add_yaxis("", CityNum) .set_global_opts( title_opts=opts.TitleOpts(title="各大区域房价平均价"), visualmap_opts=opts.VisualMapOpts( dimension=1, pos_right="5%", max_=30, is_inverse=True, ), xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)) # 设置X轴标签旋转角度为45度 ) .set_series_opts( label_opts=opts.LabelOpts(is_show=False), markline_opts=opts.MarkLineOpts( data=[ opts.MarkLineItem(type_="min", name="最小值"), opts.MarkLineItem(type_="max", name="最大值"), opts.MarkLineItem(type_="average", name="平均值"), ] ), ) ) c.render_notebook()
import pandas as pd from pyecharts.charts import Bar import pyecharts.options as opts # 清理数据并将'单价'列转换为整数类型 df['单价'] = df['单价'].str.replace(',', '').astype(int) # 计算平均价 avg_salary = df.groupby('区域')['单价'].mean() # 获取城市类型和城市平均价格 CityType = avg_salary.index.tolist() CityNum = [int(a) for a in avg_salary.values.tolist()] # 创建柱状图实例 c = ( Bar() .add_xaxis(CityType) .add_yaxis("", CityNum) .set_global_opts( title_opts=opts.TitleOpts(title="各大区域房价单价平均价格"), visualmap_opts=opts.VisualMapOpts( dimension=1, pos_right="5%", max_=30, is_inverse=True, ), xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)) # 设置X轴标签旋转角度为45度 ) .set_series_opts( label_opts=opts.LabelOpts(is_show=False), markline_opts=opts.MarkLineOpts( data=[ opts.MarkLineItem(type_="min", name="最小值"), opts.MarkLineItem(type_="max", name="最大值"), opts.MarkLineItem(type_="average", name="平均值"), ] ), ) ) # 在Notebook中显示柱状图 c.render_notebook()
适合练手的25个Python案例源码分享,总有一个你想要的
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。