赞
踩
最近在猎聘上爬了一些物流岗位相关的数据,看着这些爬下来的数据,心里就开始痒痒了,想着怎么把数据利用起来,于是开始了可视化的道路。
使用到的工具包为:
数据字段,一共有21个字段
数据量大概17W条。
关注文章下的公众号,回复「猎聘数据」获取相关数据资源
从数据库中加载数据「公众号中的数据为 excel 文件」
from sqlalchemy import create_engine
import pandas as pd
engine=create_engine("mysql+pymysql://用户名:passwd@ip:3306/库")
result = pd.read_sql('select * from liepin', engine)
result
job_city = result['job_city']
word_counts = job_city.value_counts().to_dict()
# 按照字典的value进行排序
data = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
代码如下:
import pandas as pd from pyecharts.charts import Bar from pyecharts import options as opts from pyecharts.commons.utils import JsCode from pyecharts.globals import ThemeType # 测试统计 color_function = """ function(params){ if(params.value>=8000){ return '#FF1493'; }else if (7000<params.value && params.value<8000){ return '#732DC1'; }else{ return '#2CC0C2'; } } """ bar = ( Bar(opts.InitOpts(width='900px', height='500px', theme=ThemeType.VINTAGE)) # 图片的大小,及主题风格 .add_xaxis([i[0] for i in data[1:11]]) .add_yaxis("", [i[1] for i in data[1:11]], itemstyle_opts=opts.ItemStyleOpts(JsCode(color_function))) .set_series_opts(label_opts=opts.LabelOpts(position="top")) # 提示的位置 .set_global_opts( title_opts=opts.TitleOpts(title="招聘岗位数量城市排名TOP10"), xaxis_opts=opts.AxisOpts( name='城市', name_location='middle', name_gap=35, name_textstyle_opts=opts.TextStyleOpts( font_family='Times New Roman', font_size=14, color='black', # font_weight='bolder', ) ), yaxis_opts=opts.AxisOpts( name='岗位数', name_location='middle', # 在轴上面所处的位置 name_gap=50, # 调整距离 name_textstyle_opts=opts.TextStyleOpts( font_family='Times New Roman', font_size=14, color='black', # font_weight='bolder', ) ) ) ) # bar.render("招聘岗位数量城市排名TOP10.html") # 生成HTML文件 bar.render_notebook() # 在notebook中展示 # make_snapshot(snapshot, bar.render(), "./猎聘数据可视化/招聘岗位数量城市排名TOP10.png", pixel_ratio=10) # 生成png,pixel_ratio设置分辨率
注意:如果在notebook中不能展示出柱状图,请参考JupyterNotebook展示Pyecharts图像
经过数据分析,发现数据的salary字段格式有五种格式:薪资面议「不考虑」、6-8k·13薪、3000元/月、100元/日、10-20k
我们统一单位为:K
代码如下:
part_interval = ["5K以下", "5K-10K", "10K-15K", "15K-20K", "20K-30K", "30K-50K", "50K以上"] level1, level2, level3, level4, level5, level6, level7 = 0, 0, 0, 0, 0, 0, 0 #遍历salary,然后对数据进行划分,取中值为标准,薪资单位为 K for i in result['salary']: if str(i) == 'nan' or "面议" in i: # 面议的则不考虑 pass elif i[-1] == "薪": # 数据中的格式为:6-8k·13薪 x = i.split("·") month = x[1][:-1] money = x[0].split("-") salary = (float(money[0]) + float(money[1][:-1])) / 2 * float(month) / 12 elif i[-3:] == "元/月": # 数据中的格式为: 3000元/月 i = i.replace('元/月', '-元/月') x = i.split('-') salary = float(x[0]) / 1000 elif i[-3:] == "元/日": # 数据中的格式为:100元/日 ,此处的天数我假设为工作30天,其实正常22天 i = i.replace('元/日', '-元/日') x = i.split('-') salary = float(x[0]) * 30 / 1000 else: # 正常单位的数据,格式为:10-20k x = i.split("-") salary = (float(x[0]) + float(x[1][:-1])) / 2 if salary <= 5: level1 += 1 if 5 < salary <= 10: level2 += 1 elif 10 < salary <= 15: level3 += 1 elif 15 < salary <= 20: level4 += 1 elif 20 < salary <= 30: level5 += 1 elif 30 < salary <= 50: level6 += 1 elif salary > 50: level7 += 1 salary = 0
from pyecharts.charts import Pie from pyecharts import options as opts from pyecharts.globals import ThemeType x_data = ["5K以下", "5K-10K", "10K-15K", "15K-20K", "20K-30K", "30K-50K", "50K以上"] y_data = level1, level2, level3, level4, level5, level6, level7 pie = ( Pie(init_opts=opts.InitOpts(width='800px', height='500px', theme=ThemeType.VINTAGE)) # 设置大小 .add( series_name="猎聘数据", data_pair=[list(z) for z in zip(x_data, y_data)], center=["50%", "55%"], # 设置圆心所在位置 radius=["40%", "60%"], # 设置饼图的内圈和外圈差 # rosetype = True, # 南丁格尔 label_opts=opts.LabelOpts( position="outside", formatter=" {b|{b}: }{c} {per|{d}%} ", # 格式为: {b|{b}: }{c} {per|{d}%} {b}:{d}% background_color="#eee", border_color="#aaa", border_width=1, border_radius=4, rich={ "a": {"color": "#999", "lineHeight": 12, "align": "center"}, "abg": { "backgroundColor": "#e3e3e3", "width": "100%", "align": "right", "height": 12, "borderRadius": [4, 4, 0, 0], }, "hr": { "borderColor": "#aaa", "width": "100%", "borderWidth": 0.5, "height": 0, }, "b": {"fontSize": 12, "lineHeight": 15}, "per": { "color": "#eee", "backgroundColor": "#334455", "padding": [2, 4], "borderRadius": 2, }, }, ), ) .set_global_opts( title_opts=opts.TitleOpts(title="招聘岗位的薪酬分布", pos_left='left'), # 设置title的位置 legend_opts=opts.LegendOpts(pos_top="10%", orient="horizontal") # 设置「各薪水类别」所在位置 ) .set_series_opts( tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{b}: {c} ({d}%)", # 设置鼠标悬停的提示信息 ) ) ) # pie.render("招聘岗位的薪酬分布.html") pie.render_notebook() # make_snapshot(snapshot, pie.render(), "./猎聘数据可视化/招聘岗位的薪酬分布.png", pixel_ratio=10)
result['work_exp'].value_counts()
根据上面的数据分析,可看出,将学历划分为:经验不限、应届生、1年以下、1-3年、3-5年、 5-10年、10年以上
可手动计算出每个学历对应的数目
from pyecharts.charts import Bar from pyecharts import options as opts from pyecharts.globals import ThemeType # V1 版本开始支持链式调用 # 你所看到的格式其实是 `black` 格式化以后的效果 # 可以执行 `pip install black` 下载使用 # .set_colors(["blue","red","black","green","orange"]) # 主题设置: init_opts=opts.InitOpts(theme=ThemeType.VINTAGE) 或者 {"theme": ThemeType.MACARONS} bar = ( Bar(opts.InitOpts(width='900px', height='500px', theme=ThemeType.VINTAGE)) .add_xaxis(["经验不限", "应届生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上"]) .add_yaxis("岗位数量", [22733, 281, 1773, 24942, 31251, 24070, 7264]) .reversal_axis() # x、y轴的数据互换 .set_series_opts(label_opts=opts.LabelOpts(position="right")) # 直方图上提示所在位置 .set_global_opts( title_opts=opts.TitleOpts(title="招聘岗位对经验的要求"), # , subtitle="副标题" xaxis_opts=opts.AxisOpts( name='岗位数量', name_location='middle', name_gap=35, # x轴名称的格式配置 name_textstyle_opts=opts.TextStyleOpts( font_family= 'Times New Roman', font_size=14, ), axistick_opts=opts.AxisTickOpts( # is_show=False, # 是否显示刻度线 is_inside=True, # 刻度线是否在内侧 ) ), yaxis_opts=opts.AxisOpts( name='经验', name_location='middle', name_gap=70, name_textstyle_opts=opts.TextStyleOpts( font_family= 'Times New Roman', font_size=14, color='black', # font_weight='bolder', ), axistick_opts=opts.AxisTickOpts( # is_show=False, # 是否显示 is_inside=True, # 刻度线是否在内侧 ) ), # 显示工具栏 # toolbox_opts=opts.ToolboxOpts(is_show=True), ) ) # bar.render("招聘岗位对经验的要求_bar.html") # make_snapshot(snapshot, bar.render(), "./猎聘数据可视化/招聘岗位对经验的要求_bar.png", pixel_ratio=10) bar.render_notebook()
result['eduLevel'].value_counts()
from pyecharts.charts import Bar from pyecharts import options as opts from pyecharts.globals import ThemeType bar = ( Bar(opts.InitOpts(width='900px', height='500px', theme=ThemeType.VINTAGE)) .add_xaxis(["学历不限", "中专/中技", "大专", "本科", "硕士", "MBA/EMBA", "博士"]) .add_yaxis("岗位数量", [8145, 1462, 46693, 54673, 1198, 6, 137]) .reversal_axis() .set_series_opts(label_opts=opts.LabelOpts(position="right")) # 直方图上提示所在位置 .set_global_opts( title_opts=opts.TitleOpts(title="招聘岗位对学历的要求"), # , subtitle="副标题" xaxis_opts=opts.AxisOpts( name='岗位数量', name_location='middle', name_gap=35, # x轴名称的格式配置 name_textstyle_opts=opts.TextStyleOpts( font_family= 'Times New Roman', font_size=14, ), # 坐标轴刻度配置项 axistick_opts=opts.AxisTickOpts( # is_show=False, # 是否显示 is_inside=True, # 刻度线是否在内侧 ) ), yaxis_opts=opts.AxisOpts( name='学历', name_location='middle', name_gap=70, name_textstyle_opts=opts.TextStyleOpts( font_family= 'Times New Roman', font_size=14, color='black', # font_weight='bolder', ), axistick_opts=opts.AxisTickOpts( # is_show=False, # 是否显示 is_inside=True, # 刻度线是否在内侧 ), ), # 显示工具栏 toolbox_opts=opts.ToolboxOpts(is_show=True), ) ) # bar.render("招聘岗位对学历的要求_bar_reversal.html") # make_snapshot(snapshot, bar.render(), "./猎聘数据可视化/招聘岗位对学历的要求_bar.png", pixel_ratio=10) bar.render_notebook()
利用上面对学历和经验分析获取的数据,绘制饼状图
import pyecharts.options as opts from pyecharts.charts import Pie from pyecharts.globals import ThemeType x_data = ["经验不限", "应届生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上"] y_data = [22733, 281, 1773, 24942, 31251, 24070, 7264] # x_data = ["学历不限", "中专/中技", "大专", "本科", "硕士", "MBA/EMBA", "博士"] # y_data = [8145, 1462, 46693, 54673, 1198, 6, 137] pie = ( Pie(init_opts=opts.InitOpts(width='850px', height='500px', theme=ThemeType.VINTAGE)) # 设置大小 width="1600px", height="1000px" .add( series_name="猎聘数据", center=["45%", "55%"], data_pair=[list(z) for z in zip(x_data, y_data)], radius=["40%", "60%"], # rosetype = True, # 南丁格尔 label_opts=opts.LabelOpts( position="outside", formatter=" {b|{b}: }{c} {per|{d}%} ", # 格式为: {b|{b}: }{c} {per|{d}%} {b}:{d}% background_color="#eee", border_color="#aaa", border_width=1, border_radius=4, rich={ "a": {"color": "#999", "lineHeight": 12, "align": "center"}, "abg": { "backgroundColor": "#e3e3e3", "width": "100%", "align": "right", "height": 12, "borderRadius": [4, 4, 0, 0], }, "hr": { "borderColor": "#aaa", "width": "100%", "borderWidth": 0.5, "height": 0, }, "b": {"fontSize": 12, "lineHeight": 15}, "per": { "color": "#eee", "backgroundColor": "#334455", "padding": [2, 4], "borderRadius": 2, }, }, ), ) .set_global_opts( title_opts=opts.TitleOpts( title="招聘岗位对经验的要求", pos_left='0%'), legend_opts=opts.LegendOpts(pos_right="5%", orient="vertical", pos_top="5%") ) .set_series_opts( tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{b}: {c} ({d}%)" ) ) ) pie.render_notebook() # make_snapshot(snapshot, pie.render(), "./猎聘数据可视化/招聘岗位对经验的要求_pie.png", pixel_ratio=10)
result.groupby(['comp_url', "人数规模"], as_index=False).size()['人数规模'].value_counts()
import json from pyecharts import options as opts from pyecharts.charts import PictorialBar from pyecharts.globals import ThemeType location = ["1-49人", "50-99人", "100-499人", "500-999人", "1000-2000人", "2000-5000人", "5000-10000人", "10000人以上"] values = [4636, 4178, 7339, 2560, 1797, 1284, 914, 1100] c = ( PictorialBar(opts.InitOpts(width='900px', height='500px', theme=ThemeType.VINTAGE)) .add_xaxis(location) .add_yaxis( "", values, label_opts=opts.LabelOpts(is_show=True, position='right', formatter=" {c}家"), # 设置y轴的标签位置及格式 symbol_size=30, # 调节图形的大小 symbol_repeat="fixed", # 格式有三种,分别为true\false\"fixed" true和fixed均是重复图形,而false图形仅一个 ,fixed与 is_symbol_clip=True, # 是否裁剪图形 好比一个图形表示10,那么值15,则用一个半的图形表示,这就是裁剪 # is_selected = False, symbol='image://data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAFVElEQVR4Xu1aS3LbRhDtBlxKKkpVmBMEolaJpQp1glAnkHIC8QamdyazCLWhvIt8A+kEsU8g+gSmS5KzksScIEpVkkpUxnR68B38AREESBPYsTiYmX7T/fp1DxDW/ME1tx8aABoPWHMEmhBYcwdYDhK8M0atf+DBeDobT6s+kFpD4Nr46YA0OuRj6EnD0YS9qkGoHIBrY9ghDZ4RwCEitNQTJ1Ps785eTqr0gkoA+M14YZgaPmPD+LTRSDLwkwJAxvVf2sMRI9xjF+/kOdWVB0Aa/bf+34Hl3oCHeYz+JELAJbO4uC4CgusBV9uDCyCc6UIcfzt7OSsyR9Gxj+YAm8zoiAB7YTKL3QTRe0K49/4j6CDiV3EecLU9ZCydh+BskUAUBsAhtIs0MosAwMbv3J4EeMACUId3mQAsGIjCAFwaL7qoaxdFXI0Ijndvx6PwO4GT5j/9EFA8QHmJ57nXAE6/EBuvtmYj35uKbCY0thIA2LTJzs3Jvrq25Um6dpfbAxYEREUAyOOFKS/2WtpBQK047sjygPBBux7x9HZ8/FgnqA6AHDssCoBPlDRjrTFiLzvPsUxgSEkA0DmZdObOrOvYMpm9XZYnoj911vumSV7coo49Vv9HuUOA6DkJ8ool1LDDRPxLwBoiTp20XyR1lgKAboqt8KKX7eGI0+PPcoNI8IrdtP9YEuSQebt7c9KNvN8eTBmE7wMgJhBukmesOADDd2GZnZRxFgoAsxqLFT8EBGgtoRGHgF3tWWQlsKeB8EKAiyNZI3AY+E8aB/Ac/SdKCHzkEOD5T2OIMTblLhaAosyTMP7RJKiCWEcIOKntrbsPjvlWODbZS34nZLZ2n7xS2B+f+b7jbdV7gAb043c3J1aOdx9WefL3gfU7RgrHKcpED2DwNsVGR1V/duX58MdShMDOzThCpmoWmFcKJ2WBy+3BhMvuH2rPAnGNDC5pz/w8P6cU5vzOxdSWaqjTcLmLtNXq4ADJ8gjM+oAWywukTrQhQhMknDh8IbNDpD2WSoKKlHbm4CyCxlKEQElJILMazLNOTTqA3musxV2pK2UqdzRYCfoND7kxEMLyACmVBTdSPJJ0LEvzAKkmhRAe0XJJLpWlTbLKUwsAcVL4uj085Q6Q7ATLNHDOhYo02HviWDw5C0QbKnKiq/ZQFkHf1A7AprnxdbhBUU0tsCRSmN1zpNbk1ulqD9w2c9rhMVXaVXvQD1dzSR5gS2nYV2+NkjpTtYRAHnLKM2alpXAeA7PGrCwA4Xrf6Ry/9uoBlrLc2u6qPQOVI1xgUqvB0L3hUoVAfSS4Ig2R+dMgTHdux3vhEGIivQurwZpIkCa6CSN3g6YOBhGeqg0RROrzmJk75qMO/bBczhBCI03QxH1faNhlneGt6YVRHbVAFrnl/X9lSVDW+9zn92Qq3960fBVom29JWfDvBrl4ksVQsKHpEF34xsgB8A27t98VtjXGckjhuE9bglIY3nDPIHBdXoYUvmwP7iMXrHWEwNo3RDgEnnPDItChte74AbuW+7IU3hSf7an1woftwSFXhL8GCpmEEJBS+IkQe6qOiLtdtpeKv4hN4qFS7gVsIzk+0b754QwgW9bBD6Bk0wSdmx1CbppGP5vJJkE/C3jghiyrD4C8VJ8yLhuA7EUaAGoLgezDyRzReEC6DsgEsF4SzLW99EFL6QEhQVOCmdVNkXQtr+4gMw3G3b5UZ8J8KyXdKDUAKAhkekDgknO+A6nh7Wg7PryJTACsOzj4N9fHzjVYmLrkl/D5NOt7wkwAls2osvfTAFA2oqs2X+MBq3ZiZe+38YCyEV21+f4HGU+nfTIBesIAAAAASUVORK5CYII=', ) .reversal_axis() .set_global_opts( title_opts=opts.TitleOpts(title="公司人数统计"), yaxis_opts=opts.AxisOpts( name='公司人数', name_location='middle', axistick_opts=opts.AxisTickOpts(is_show=False), axisline_opts=opts.AxisLineOpts( linestyle_opts=opts.LineStyleOpts(opacity=0) ), name_gap=90, name_textstyle_opts=opts.TextStyleOpts( font_family= 'Times New Roman', font_size=14, color='black', # font_weight='bolder', ), axislabel_opts=opts.LabelOpts( font_size=12, font_family='Times New Roman', ), ), xaxis_opts=opts.AxisOpts( name='公司数目', # is_inverse=True, 反向坐标轴 axislabel_opts=opts.LabelOpts(font_size = 14, font_family='Times New Roman'), name_location='middle', name_gap=35, # # x轴名称的格式配置 name_textstyle_opts=opts.TextStyleOpts( font_family= 'Times New Roman', font_size=14, ), # 坐标轴刻度配置项 axistick_opts=opts.AxisTickOpts( # is_show=False, # 是否显示 is_inside=True, # 刻度线是否在内侧 ), # 坐标轴线的配置 axisline_opts=opts.AxisLineOpts( linestyle_opts=opts.LineStyleOpts( width=1, color='black', ) ), ), ) ) c.render_notebook() # c.render("公司人数统计.html") # make_snapshot(snapshot, c.render(), "./猎聘数据可视化/公司人数统计.png", pixel_ratio=10)
象形柱状图参数的具体设置及图标相关网站,请参考:https://blog.csdn.net/qq_42571592/article/details/122818791
行业中的数据格式为:政务/公共服务,所以先使用「/」分割,然后在进行统计
import collections
word_list = []
for i in result['企业行业']:
if i:
x = i.split('/')
for j in x:
word_list.append(j)
word_counts = collections.Counter(word_list)
from pyecharts import options as opts from pyecharts.charts import WordCloud from pyecharts.globals import SymbolType from pyecharts.globals import ThemeType c = ( WordCloud(opts.InitOpts(width='900px', height='500px', theme=ThemeType.VINTAGE)) .add("", [(k, v) for k,v in word_counts.items()], word_size_range=[20, 90], # 单词字体大小范围 shape=SymbolType.ROUND_RECT # # 词云图轮廓 ) .set_global_opts( title_opts=opts.TitleOpts(title="公司所属行业分布") ) ) # c.render("公司所属行业分布.html") c.render_notebook() # make_snapshot(snapshot, c.render(), "./猎聘数据可视化/公司所属行业分布.png", pixel_ratio=10)
import collections
word_list = []
for i in result['welfare']:
if i:
x = i.split('|')
for j in x:
word_list.append(j)
word_counts = collections.Counter(word_list)
word_counts
from pyecharts import options as opts from pyecharts.charts import WordCloud from pyecharts.globals import SymbolType from pyecharts.globals import ThemeType c = ( WordCloud(opts.InitOpts(width='900px', height='500px', theme=ThemeType.VINTAGE)) .add( series_name="职位福利分析", data_pair = [(k, v) for k,v in word_counts.items()], word_size_range=[20, 100], shape="cursive", ) .set_global_opts(title_opts=opts.TitleOpts(title="职位福利")) # .render("公司所属行业分布.html") ) # c.render("职位福利.html") c.render_notebook() # make_snapshot(snapshot, c.render(), "./猎聘数据可视化/职位福利.png", pixel_ratio=10)
Pyecharts直角坐标系图:象形柱状图 PictorialBar
本人公众号为 【信橙则灵】,大家需要数据的话,微信搜索公众号名称,关注后发送【猎聘数据】,即可获得相关数据。
欢迎大家在评论区留言,知无不言,言无不尽。感觉不错的话,别忘了点赞收藏哦!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。