赞
踩
导入相关库
-
- from pyecharts import Bar,Pie
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import time
数据清洗与简单统计
-
- df=pd.read_excel("all_data_meituan.xlsx")
-
- df.drop('comment',axis=1).head(2)
-
- df['avgPrice'].value_counts()
- # 同一家店的均价应该为同一个数值,所以这列数据没多大的意义
- 73 17400
- Name: avgPrice, dtype: int64
-
- df['anonymous'].value_counts()
- # 匿名评价与实名评价的比例大致在5:1左右
- False 14402
- True 2998
- Name: anonymous, dtype: int64
时间格式的转化
-
- def convertTime(x):
- y=time.localtime(x/1000)
- z=time.strftime("%Y-%m-%d %H:%M:%S",y)
- return z
- df["commentTime"]=df["commentTime"].apply(convertTime)
- df["commentTime"].head()
- 0 2018-05-09 22:21:48
- 1 2018-06-01 19:41:31
- 2 2018-04-04 11:52:23
- 3 2018-05-01 17:12:22
- 4 2018-05-17 16:48:04
- Name: commentTime, dtype: object
-
- # 在excel可以用筛选器直接看到这列中的数据含有缺失值,或者在拿到数据的时候,使用df.info() 查看每列的数据信息情况
- df['dealEndtime'].isna().value_counts()
- # 这列数据中含有177个缺失值,其余完整
- False 17223
- True 177
- Name: dealEndtime, dtype: int64
-
- df['commentTime']=pd.to_datetime(df['commentTime'])
- df1 = df.set_index('commentTime')
- df1.resample('D').size().sort_values(ascending=False).head(100)
- df2=df1.resample('M').size().to_period()
- df2=df2.reset_index()
- # df2.columns
- # from pyecharts import Bar
- bar =Bar("按月统计",width=1000,height=800)
- bar.add("按月统计",df2['commentTime'],df2[0],is_label_show=True, is_datazoom_show=True,is_toolbox_show=True,is_more_utils=True)
- bar
-
- df['commentTime']=pd.to_datetime(df['commentTime'])
- df['weekday'] = df['commentTime'].dt.weekday
- df2= df.groupby(['weekday']).size()
- # 周末吃外卖的还是教平时多了一些
- from pyecharts import Bar
- bar =Bar("按周统计",width=750,height=400)
- weekday=["一","二","三","四","五","六","日"]
- bar.add("按周统计",['周{}'.format(i) for i in weekday],df2.values,is_label_show=True, is_datazoom_show=False,is_toolbox_show=True,is_more_utils=True,is_random=True)
- bar
-
- df['commentTime']=pd.to_datetime(df['commentTime'])
- df['day'] = df['commentTime'].dt.day
- df2= df.groupby(['day']).size()
- df2
- from pyecharts import Bar
- bar =Bar("按天统计",width=1000,height=400)
- bar.add("按天统计",['{} 日'.format(i) for i in df2.index],df2.values,is_label_show=True, is_datazoom_show=True,is_toolbox_show=True,is_more_utils=True,is_random=True)
- bar
-
- df['commentTime']=pd.to_datetime(df['commentTime'])
- df['hour'] = df['commentTime'].dt.hour
- df2= df.groupby(['hour']).size()
- df2
- from pyecharts import Bar
- bar =Bar("按时统计",width=1000,height=600)
- bar.add("按时统计",['{} h'.format(i) for i in df2.index],df2.values,is_label_show=True, is_datazoom_show=True,is_toolbox_show=True,is_more_utils=True,is_random=True)
- bar
-
- # 处理数据前需要先处理缺失值
- # 订单结束时间清洗
- df['dealEndtime'].fillna(method='ffill').apply(lambda x:time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(x))).head()
- 0 2018-06-30 14:00:00
- 1 2018-06-30 14:00:00
- 2 2018-06-30 14:00:00
- 3 2018-06-30 14:00:00
- 4 2018-06-30 14:00:00
- Name: dealEndtime, dtype: object
套餐的统计
-
- df['menu'].dropna().astype('category').value_counts()
- 2人午晚餐 7640
- 单人午晚餐 3920
- 学生专享午晚自助 2638
- 4人午/晚自助 1581
- 单人下午自助烤肉 639
- 6人午/晚自助 507
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 209
- 单人午/晚自助 67
- 周一至周五自助烤肉,免费WiFi 22
- Name: menu, dtype: int64
-
- df['readCnt'].corr(df['star'])
- # 评论阅读书与客户评价分数高低的相关性
0.05909293203205019
-
- df_most=df[(df["menu"]=="2人午晚餐")]['star'].value_counts().reindex(range(10,60,10))
- 10 329
- 20 533
- 30 2002
- 40 2704
- 50 2072
- Name: star, dtype: int64
-
- df[(df["menu"]=="单人午晚餐")]['star'].value_counts()
- 30 1215
- 40 1208
- 50 1093
- 20 298
- 10 106
- Name: star, dtype: int64
-
- # 学生专享午晚自助
- df[(df["menu"]=="学生专享午晚自助")]['star'].value_counts()
- 40 954
- 50 863
- 30 529
- 20 191
- 10 101
- Name: star, dtype: int64
-
- df[(df["menu"]=="4人午/晚自助")]['star'].value_counts()
- 50 536
- 30 432
- 40 414
- 10 131
- 20 68
- Name: star, dtype: int64
-
- df[(df["menu"]=="单人下午自助烤肉")]['star'].value_counts()
- 30 208
- 50 169
- 40 144
- 10 98
- 20 20
- Name: star, dtype: int64
-
- df[(df["menu"]=="6人午/晚自助")]['star'].value_counts()
- 50 245
- 40 142
- 30 112
- 10 8
- Name: star, dtype: int64
-
- #周一至周五自助烤肉/周六日及节假日自助烤肉2选1
- df[(df["menu"]=="周一至周五自助烤肉/周六日及节假日自助烤肉2选1")]['star'].value_counts()
- 50 87
- 40 66
- 30 46
- 20 10
- Name: star, dtype: int64
-
- df[(df["menu"]=="单人午/晚自助")]['star'].value_counts()
- 50 30
- 40 27
- 30 10
- Name: star, dtype: int64
-
- df[(df["menu"]=="周一至周五自助烤肉,免费WiFi")]['star'].value_counts().reindex(range(10,51,10)).fillna(0)
- 10 0.0
- 20 0.0
- 30 0.0
- 40 0.0
- 50 22.0
- Name: star, dtype: float64
套餐与评价汇总
-
- # df.groupby(['menu','star']).size().to_excel("all_menu_star.xls") 可以直接导出到excel
- df.groupby(['menu','star']).size()
- menu star
- 2人午晚餐 10 329
- 20 533
- 30 2002
- 40 2704
- 50 2072
- 4人午/晚自助 10 131
- 20 68
- 30 432
- 40 414
- 50 536
- 6人午/晚自助 10 8
- 30 112
- 40 142
- 50 245
- 单人下午自助烤肉 10 98
- 20 20
- 30 208
- 40 144
- 50 169
- 单人午/晚自助 30 10
- 40 27
- 50 30
- 单人午晚餐 10 106
- 20 298
- 30 1215
- 40 1208
- 50 1093
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 20 10
- 30 46
- 40 66
- 50 87
- 周一至周五自助烤肉,免费WiFi 50 22
- 学生专享午晚自助 10 101
- 20 191
- 30 529
- 40 954
- 50 863
- dtype: int64

-
- df.groupby(['star','menu',]).size()
- star menu
- 10 2人午晚餐 329
- 4人午/晚自助 131
- 6人午/晚自助 8
- 单人下午自助烤肉 98
- 单人午晚餐 106
- 学生专享午晚自助 101
- 20 2人午晚餐 533
- 4人午/晚自助 68
- 单人下午自助烤肉 20
- 单人午晚餐 298
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 10
- 学生专享午晚自助 191
- 30 2人午晚餐 2002
- 4人午/晚自助 432
- 6人午/晚自助 112
- 单人下午自助烤肉 208
- 单人午/晚自助 10
- 单人午晚餐 1215
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 46
- 学生专享午晚自助 529
- 40 2人午晚餐 2704
- 4人午/晚自助 414
- 6人午/晚自助 142
- 单人下午自助烤肉 144
- 单人午/晚自助 27
- 单人午晚餐 1208
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 66
- 学生专享午晚自助 954
- 50 2人午晚餐 2072
- 4人午/晚自助 536
- 6人午/晚自助 245
- 单人下午自助烤肉 169
- 单人午/晚自助 30
- 单人午晚餐 1093
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 87
- 周一至周五自助烤肉,免费WiFi 22
- 学生专享午晚自助 863
- dtype: int64

-
- df.groupby(['star','menu',]).size()[50]
- menu
- 2人午晚餐 2072
- 4人午/晚自助 536
- 6人午/晚自助 245
- 单人下午自助烤肉 169
- 单人午/晚自助 30
- 单人午晚餐 1093
- 周一至周五自助烤肉/周六日及节假日自助烤肉2选1 87
- 周一至周五自助烤肉,免费WiFi 22
- 学生专享午晚自助 863
- dtype: int64
-
- # userId
- # 这家店铺有好多回头客,万万没想到
- df[df['userId']!=0]['userId'].value_counts().head(40)
- 266045270 64
- 152775497 60
- 80372612 60
- 129840082 60
- 336387962 60
- 34216474 60
- 617772217 60
- 82682689 54
- 287219504 49
- 884729389 45
- ...
- 232697160 40
- 141718492 40
- 879430090 40
- 696143486 40
- 13257519 40
- 983797146 40
- 911947863 40
- 993057629 40
- 494215297 40
- Name: userId, dtype: int64

-
- df[df['userName']!="匿名用户"]['userName'].value_counts().head(40)
- xuruiss1026 64
- 黑发飘呀飘 60
- 么么哒我是你聪叔 60
- jIx325233926 60
- siisgood 60
- vTF610712604 60
- 始于初见的你 60
- yumengkou 54
- Daaaav 49
- 梁子7543 45
- oev575457132 40
- oUI806055883 40
- joF498901567 40
- liE32679330 40
- ...
- 清晨cxh98 40
- cBj31240225 40
- 天蛟Wing 40
- 榴莲馅月饼 40
- leeman666888 40
- 迅行天下 40
- 滨海之恋33 40
- pHO437742850 40
- SzX539077433 40
- Name: userName, dtype: int64

评分与用户等级汇总
-
- df.groupby(['star','userLevel',]).size()
- star userLevel
- 10 0 187
- 1 139
- 2 164
- 3 193
- 4 80
- 5 10
- 20 0 223
- 1 88
- 2 304
- 3 294
- 4 207
- 5 21
- 30 0 1147
- 1 405
- 2 1057
- 3 1230
- 4 570
- 5 165
- 6 20
- 40 0 870
- 1 432
- 2 1360
- 3 1751
- 4 1026
- 5 261
- 6 25
- 50 0 698
- 1 386
- 2 1167
- 3 1670
- 4 802
- 5 318
- 6 130
- dtype: int64

-
- df_level_star = df.groupby(['userLevel','star']).size()
- attr = np.arange(10,60,10)
-
- from pyecharts import Bar
- bar = Bar("用户等级与评分",title_pos="center")
- df_0 = df_level_star[0].values
- df_1 = df_level_star[1].values
- df_2 = df_level_star[2].values
- df_3 = df_level_star[3].values
- df_4 = df_level_star[4].values
- df_5 = df_level_star[5].values
- # df_6 = df_level_star[6].values
- df_6 = df_level_star[6].reindex(attr).fillna(0).values
-
- bar.add("level 0",attr,df_0,is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 1",attr,df_1,is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 2",attr,df_2,is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 3",attr,df_3,mark_line=["average"],mark_point=['max','min'],is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 4",attr,df_4,is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 5",attr,df_5,is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 6",attr,df_6,is_label_show=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar

-
- bar = Bar("用户等级与评分",title_pos="center",title_color="red")
- attr = np.arange(10,60,10)
- df_0 = df_level_star[0].values
- df_1 = df_level_star[1].values
- df_2 = df_level_star[2].values
- df_3 = df_level_star[3].values
- df_4 = df_level_star[4].values
- df_5 = df_level_star[5].values
- # df_6 = df_level_star[6].values
- df_6 = df_level_star[6].reindex(attr).fillna(0).values
- bar.add("level 0",attr,df_0,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 1",attr,df_1,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 2",attr,df_2,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 3",attr,df_3,is_stack=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 4",attr,df_4,is_stack=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 5",attr,df_5,is_stack=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar.add("level 6",attr,df_6,is_stack=True,legend_pos='right',legend_orient='vertical',label_text_size=12)
- bar

-
- df['star'].corr(df['userLevel'])
0.14389808871897794
-
- df_zan=df['zanCnt'].value_counts()
- from pyecharts import Bar
- bar=Bar("点赞统计")
- bar.add("点赞分布",df_zan.index[1:],df_zan.values[1:],is_label_show=True)
- bar
-
- df.describe()
-
- df['userLevel'].value_counts().reindex(range(7))
- 0 3125
- 1 1450
- 2 4052
- 3 5138
- 4 2685
- 5 775
- 6 175
- Name: userLevel, dtype: int64
-
- df_level=df['userLevel'].value_counts().reindex(range(7))
- from pyecharts import Pie
- pie=Pie("用户等级分布",title_pos="center",width=900)
- pie.add("levels distribution",["level "+str(i) for i in range(7)],df_level.values,is_random=True,radidus=[30,45],legend_pos='left',rosetype='area',legend_orient='vertical',is_label_show=True,label_text_size=20)
- pie
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。