赞
踩
说明:
Jupyter项目是一个交互式的Python科学计算和数据分析生态系统,最常用的是Jupyter Notebook
交互式的编程环境,友好的界面,便于分享的文档格式,对排版语法、绘图、数学公式的支持,使她成为最流行的Python科学计算工具
install:
# 1. 进入虚拟环境
workon py3_numpy
#2 安装模块
windows
pip3 install jupyter
pip3 install matplotlib
==============
==============
linux:
pip3 install notebook
pip3 install matplotlib
打开 jupyter-notebook
关闭 Ctrl + c 输入 y 关闭
折线图
# 导入 绘图库
import matplotlib.pyplot as plt
# 绘制一条线时, x轴 可以省略
plt.plot([0,2,4,6,8]) # 默认Y轴坐标
plt.plot([0,2,4,6,8],[1,5,3,9,7]) # X轴坐标值,Y轴坐标值
plt.show() #显示图片
# 汇率 eurcny = [6.8007,6.8007,6.8015,6.8015,6.8060,6.8036,6.8025,6.7877,6.7835,6.7758,6.7700,6.7463,6.7519,6.7595,6.7669,6.7511,6.7511,6.7539,6.7430,6.7374,6.7265] # 日期 date = [3,4,5,6,7,10,11,12,13,14,17,18,19,20,21,24,25,26,27,28,31] plt.plot( date, # x 轴数据 , 日期 eurcny,# y轴数据, 收盘价 color='r', # 线条颜色, linestyle='--', # 线条风格 linewidth=2, # 线条粗细 marker=‘o’, # 标记风格 markerfacecolor='#ffff00' , # 标记颜色 markersize=5, # 标记大小 alpha=0.5, # 透明度 ) plt.plot(eurcny) plt.show()
散点图/气泡图
# 数据
x = [1,3,5,7,9,11,13,15,17]
y = [2,-5,19,3,5,8,12,6,1]
# 绘图
plt.scatter(x, y)
plt.show()
x = [1,2,3,4,5] y = [3,6,1,8,2] # 柱状图,x轴为单个柱子,y轴为柱子高度,Width用于柱子粗细 plt.bar(x, y) plt.show() # 条形图,注意x,y含义 plt.barh( x, # 横条离开x轴的距离 y, # 横条长度 height=0.5, # 横条粗细 ) # y轴标注 plt.yticks(x,['a','b','c','d','e']) plt.show()
案例
# 指定画版的大小和分辨率 plt.figure(figsize=(6,6), dpi=100) # 男生平均分, 语文/数学/英语/物理/化学 boy = [85.5,91,72,59,66] # 女生平均分 girl = [94,82,89.5,62,49] # 科目坐标 course = [1,2,3,4,5] # 绘图 男生 plt.bar( course, boy, color = 'g', width = 0.3, alpha = 0.3 ) # 绘图 女生 # 间隔 0.3 width =0.3 course2 = [1.3,2.3,3.3,4.3,5.3] plt.bar( course2, # x girl, # y color = 'r', width = 0.3, alpha = 0.3 ) # 科目坐标替换为字符 course3 = [1.15,2.15,3.15,4.15,5.15] plt.xticks(course3, ['Chi','Math', 'Eng','Phy', 'Che']) # 保存图像 plt.savefig("./avg_data.png") # 显示图像 plt.show()
还有一大堆 图表, 这里不一 一列举了
折线图
# 指定画版的大小和分辨率
plt.figure(figsize=(6,6), dpi=100)
# 指定坐标点,注意是列表, 注意x和y 的数量要对应
x = [i for i in range(10)]
y = [12,32,2,34,23,12,56,34,23,44]
#
plt.plot(x,y)
# 保存图像
plt.savefig("./data1.png")
plt.show()
解决中文显示问题
linux 版:
# 比较北京和上海 天气 案例 import matplotlib.pyplot as plt import random from matplotlib import font_manager # 放字体文件目录 必须是绝对路径 my_font = font_manager.FontProperties(r'D:\Pycharm-project\untitled\py3_nmp\arial unicode ms.ttf') # 指定画板大小和分辨率 plt.figure(figsize=(20, 10), dpi=100) # 制定坐标点 注意必须是列表 注意x和y的数量要一致 x = range(60) # 15到18度随机 60个点 y = [random.uniform(15, 18) for i in range(60)] y_beijing = [random.uniform(0, 5) for i in range(60)] y_ = [i for i in range(50)] # 画出折线图 上海的 plt.plot(x, y, label="上海") # 北京的 plt.plot(x, y_beijing, label="北京", color='r', linestyle="--", linewidth=5) x_name = ["11点{}分".format(i) for i in range(60)] # x轴刻度 plt.xticks(x[::5], x_name[::5], fontproperties=my_font) # y轴刻度 plt.yticks(y_[::5]) # x和y轴的名字 plt.xlabel("时间", fontdict={"fontproperties": my_font}) plt.ylabel("温度", fontdict={"fontproperties": my_font}) # 标题 plt.title("time in 11 to 12") # 显示折线数据的描述 上海和北京 plt.legend(loc="best",prop = my_font) plt.savefig("./data_bjhangh.png") plt.show()
Windows版:
import matplotlib.pyplot as plt import random from matplotlib import font_manager # 指定画板大小和分辨率 plt.figure(figsize=(20, 10), dpi=100) # Windows 字体设置 # 字体设置 # 1. 方式1 plt.rc('font', family='SimHei', size=13) # 方式2 #plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体 #plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题 # 制定坐标点 注意必须是列表 注意x和y的数量要一致 x = range(60) # 15到18度随机 60个点 y = [random.uniform(15, 18) for i in range(60)] y_beijing = [random.uniform(0, 5) for i in range(60)] y_ = [i for i in range(50)] # 画出折线图 上海的 plt.plot(x, y, label="上海") # 北京的 plt.plot(x, y_beijing, label="北京", color='r', linestyle="--", linewidth=5) x_name = ["11点{}分".format(i) for i in range(60)] # x轴刻度 plt.xticks(x[::5], x_name[::5], ) # y轴刻度 plt.yticks(y_[::5]) # x和y轴的名字 plt.xlabel("时间", ) plt.ylabel("温度", ) # 标题 plt.title("time in 11 to 12") # 显示折线数据的描述 上海和北京 plt.legend(loc="best",) plt.savefig("./data_bjhangh.png") plt.show()
折线网格图
import matplotlib.pyplot as plt plt.rcParams['font.family'] = ['Arial Unicode MS', 'sans-serif'] # 指定画板大小和分辨率 plt.figure(figsize=(20, 10), dpi=100) # 绘图,并赋值给变量 n1, = plt.plot( [0,2,4,6,8], # X轴坐标值 [1,5,3,9,7], # Y轴坐标值 label='number1图例标题1' # 图例 ) n2, = plt.plot([1,3,5,7,9],[0,4,2,8,6], label = 'number2图例标题2') ######################## # 图表标题 plt.title( 'hello图表标题', color = '#00ff00', fontsize = 24, ) # 坐标轴标注 plt.xlabel('X axis') plt.ylabel('Y轴标注') # 刻度和刻度标注 plt.yticks([1,3,6,9,12,15,18,20]) # 设置坐标刻度步长 plt.xticks( [0,1,2,5,8,10], # 刻度设置 ['2000','2001','2002','2005','2008','2010'], # 刻度标注 fontsize = 14, # 文字大小 rotation = 90, # 旋转角度 ) # 坐标范围 # plt.axis([-1,11,-2,12]) # X轴-1到11,Y轴-2到12 plt.xlim([-1,11]) # x轴坐标范围:-1到11 plt.ylim([-2,12]) # y轴坐标范围:-2到12 ######################## # 图例:全部绘制(需在绘图方法内加label属性,见前代码) # plt.legend() # loc 位置,frameon 有无边框 # 图例:指定数据绘制图例,绘图方法前需要赋值变量(绘图方法传给变量,变量名后带逗号) # 图例位置可用参数: # best 默认 # right # center,center left,center right # lower center,lower left,lower right # upper center,upper left,upper right plt.legend( handles = [n1,n2], # 给指定数据绘制图例 loc = 'upper right', # 图例位置 frameon= False # 有无边框 ) # 网格 plt.grid() # 存为图片文件,默认png, # 其他格式如jpg写上后缀即可 # 可带路径如`images/test.jpg`,需要先建立目录 #plt.savefig('test', dpi = 600) # dpi 分辨率,常用:72,300 # 显示图像 plt.show()
直方图
import matplotlib.pyplot as plt import random plt.figure(figsize=(15,6), dpi=100) # 准备时长 time =[131, 98, 125, 131, 124, 139, 131, 117, 128, 108, 135, 138, 131, 102, 107, 114, 119, 128, 121, 142, 127, 130, 124, 101, 110, 116, 117, 110, 128, 128, 115, 99, 136, 126, 134, 95, 138, 117, 111,78, 132, 124, 113, 150, 110, 117, 86, 95, 144, 105, 126, 130,126, 130, 126, 116, 123, 106, 112, 138, 123, 86, 101, 99, 136,123, 117, 119, 105, 137, 123, 128, 125, 104, 109, 134, 125, 127,105, 120, 107, 129, 116, 108, 132, 103, 136, 118, 102, 120, 114,105, 115, 132, 145, 119, 121, 112, 139, 125, 138, 109, 132, 134,156, 106, 117, 127, 144, 139, 139, 119, 140, 83, 110, 102,123,107, 143, 115, 136, 118, 139, 123, 112, 118, 125, 109, 119, 133,112, 114, 122, 109, 106, 123, 116, 131, 127, 115, 118, 112, 135,115, 146, 137, 116, 103, 144, 83, 123, 111, 110, 111, 100, 154,136, 100, 118, 119, 133, 134, 106, 129, 126, 110, 111, 109, 141,120, 117, 106, 149, 122, 122, 110, 118, 127, 121, 114, 125, 126,114, 140, 103, 130, 141, 117, 106, 114, 121, 114, 133, 137, 92,121, 112, 146, 97, 137, 105, 98, 117, 112, 81, 97, 139, 113,134, 106, 144, 110, 137, 137, 111, 104, 117, 100, 111, 101, 110,105, 129, 137, 112, 120, 113, 133, 112, 83, 94, 146, 133, 101,131, 116, 111, 84, 137, 115, 122, 106, 144, 109, 123, 116, 111,111, 133, 150] # 指定组距 (每隔多少分钟统计一次数量) width =3 # 组数 num_bins = int((max(time)- min(time))/width) # 显示直方图 plt.hist(x=time, bins=num_bins, density=True) # 指定显示刻度的个数 plt.xticks(range(min(time), max(time))[::5]) # 指定标题 plt.title('250个电影的时长分布图', ) plt.grid(True, linestyle='--', alpha=0.5) plt.show()
饼图api
movie_name = ['雷神3:诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴', '降魔传','追捕','七十七天','密战','狂兽','其它'] # 设置字体 plt.rcParams['font.family'] = ['Arial Unicode MS', 'sans-serif'] # 指定画板大小和分辨率 plt.figure(figsize=(20,10), dpi=100) place_count = [60605,54546,45819,28243,13270,9945,7679,6799,6101,4621,20105] # 绘制 饼图 # explode突出块,突出比例, p = plt.pie(place_count,labels=movie_name, explode=(0,0.1,0,0,0,0,0,0.3,0,0,0.1), autopct='%0.2f%%',colors=['b','r','g','y','c','m','y','k','c','g','r']) # linux 解决方法 # 把描述设置为汉字字体 # for t in p[1]: # t.set_fontproperties(my_font) # plt.legend(loc="best",prop = my_font) # plt.title("今日排片占比",fontproperties=my_font) plt.legend(loc="best") plt.title("今日排片占比") plt.axis('equal') plt.show()
散点图
数组
# ndarray 数组
import numpy as np
class1 = np.array([99,60,80,5,50])
print(class1)
print(class1.dtype)
print('++++++++=')
# dtype 查看数组 类型
s1 = np.array([True,80,177.7,'张小明'])
print(s1,s1.dtype)
# [99 60 80 5 50]
# int32
# ++++++++=
# ['True' '80' '177.7' '张小明'] <U32
dtype 查看数组 类型
ndarray数组对象属性
案例
import numpy as np # 二维数组,1班学生情况 class1 = np.array([ [True,80,177.7,'张小明'], [False,99,164.5,'李丽'], [True,59,158,'王华'] ]) print(class1.dtype) print(class1.size) print(class1.ndim) # 秩 print(class1.shape) # 轴 print("================") # 三维数组,1班学生 期中,期末情况 class1 = np.array([ [ [True,80,177.7,'张小明'], [False,99,164.5,'李丽'], [True,59,158,'王华'] ], [ [True,95,178,'张小明'], [False,99,164.5,'李丽'], [True,39,178,'王华'] ], ]) print(class1.dtype) print(class1.size) print(class1.ndim) # 秩 print(class1.shape) # 轴 print(len(class1.shape))
import numpy as np # ===== # np.arange() # 类似range()函数(递增的整数序列),元素从0到n-1 list_01 = np.arange(10) # 起始 ,终点, 步长, list_02 = np.arange(1,9,2) # print(list_01, list_02) # [0 1 2 3 4 5 6 7 8 9] [1 3 5 7] # ===== # np.linspace() # 根据起止数据等间距填充数据形成数组 num01 = np.linspace(1,10,4) # print(num01) # [ 1. 4. 7. 10.] # endpoint 表示终止元素 num02 = np.linspace(1,10,4, endpoint = False) # print(num02) #[1. 3.25 5.5 7.75] # ===== # np.array() # 形成数组 n1 = np.array([[1,23,4],[3,4,5]]) # print(n1) # [[ 1 23 4] # [ 3 4 5]] # ===== # np.ones(shape) # 根据shape生成一个全1数组,shape是元组类型 # np.ones_like(ndarray) # 以另一个数组为参数,根据其形状和dtype创建全1数组 # n2 = np.ones((2,3,4)) # print(n2,) # print("==============") # n3 = np.ones_like(n2) # print(n3) # [[[1. 1. 1. 1.] # [1. 1. 1. 1.] # [1. 1. 1. 1.]] # # [[1. 1. 1. 1.] # [1. 1. 1. 1.] # [1. 1. 1. 1.]]] # ============== # [[[1. 1. 1. 1.] # [1. 1. 1. 1.] # [1. 1. 1. 1.]] # # [[1. 1. 1. 1.] # [1. 1. 1. 1.] # [1. 1. 1. 1.]]] # # ======= # np.zeros(shape) 根据shape生成一个全0数组,shape是元组类型 # np.zeros_like(ndarray) 以另一个数组为参数,根据其形状和dtype创建全0数组 # 全0 # n3 = np.zeros((3, 6), dtype = np.int32) # n4 = np.zeros_like(n3) # print(n3) # print("===========") # print(n4) # [[0 0 0 0 0 0] # [0 0 0 0 0 0] # [0 0 0 0 0 0]] # =========== # [[0 0 0 0 0 0] # [0 0 0 0 0 0] # [0 0 0 0 0 0]] # ====== # np.empty(shape) 创建新数组只分配内存空间,随意填充一些垃圾值 # np.empty_like(ndarray) 以另一个数组为参数,根据其形状和dtype创建填充值数组 # 填充数组 # n1 = np.empty((6, 7)) # n2 = np.empty_like(n1) # print(n1) # print("=========") # print(n2) # [[1.18036057e-311 1.18024123e-311 0.00000000e+000 0.00000000e+000 # 1.18036090e-311 0.00000000e+000 1.18036090e-311] # [0.00000000e+000 0.00000000e+000 1.18022288e-311 0.00000000e+000 # 1.18022288e-311 0.00000000e+000 1.18022288e-311] # [0.00000000e+000 1.18022288e-311 0.00000000e+000 1.18022288e-311 # 0.00000000e+000 1.18022288e-311 0.00000000e+000] # [1.18022288e-311 0.00000000e+000 1.18022288e-311 0.00000000e+000 # 1.18022288e-311 0.00000000e+000 1.18022288e-311] # [0.00000000e+000 0.00000000e+000 0.00000000e+000 6.95331186e-310 # 0.00000000e+000 0.00000000e+000 6.95331185e-310] # [6.95331185e-310 6.95331184e-310 0.00000000e+000 0.00000000e+000 # 0.00000000e+000 0.00000000e+000 0.00000000e+000]] # ========= # [[1.18024210e-311 1.18024123e-311 9.29433783e+242 1.49174223e+195 # 4.95261533e+223 7.19464630e+159 1.99501687e+161] # [2.76518167e+180 4.45511939e-091 2.75383585e+212 2.97762064e+228 # 7.66991258e+170 1.06112891e-153 4.64501053e+151] # [2.63265729e+267 1.32882271e-258 3.68777421e+180 4.47593816e-091 # 2.93573416e+222 1.36455813e+161 5.37649537e+242] # [9.92152605e+247 5.03734573e+180 5.28595595e-085 4.64501053e+151 # 5.50436598e+257 5.03734573e+180 9.92152728e+247] # [5.03734573e+180 4.83245960e+276 8.03408340e-095 1.95575364e-109 # 2.87903286e-152 2.58400946e+161 7.61384359e-010] # [5.34083717e+228 5.02383426e+223 4.27195504e+270 2.59345414e+161 # 5.03734574e+180 8.03408340e-095 1.05894728e-153]] # ====== # np.full(shape,val) 根据shape生成一个数组,每个元素都是val # np.full_like(a,val) 根据数组a的形状生成一个全 val 数组 # 设定值数组 n1 = np.full(6, 3) n2 = np.full((2,3,4), 25) n3 = np.full_like(n1, 25) print(n1) print('==========') print(n2) print('==========') print(n3) # ====== # np.eye(n),np.identity(n) 创建一个正方的n*n单位矩阵,对角线为1,其余为0 # np.diag(list) 创建一个正方形矩阵,对角线为参数值 # n1 = np.eye(5) # n2 = np.identity(5) # print(n1) # print("===========") # print(n2) # 正方形矩阵,对角线为参数值 # n1 = np.diag([1,3,5,7,9]) # print(n1) # [[1 0 0 0 0] # [0 3 0 0 0] # [0 0 5 0 0] # [0 0 0 7 0] # [0 0 0 0 9]]
数组运算
import numpy as np # arr01 = np.ones([30]) # print(arr01) # print('=================') # # arr02 = arr01.reshape((5,6)) # print(arr02) # print('=================') # # arr03 = arr01.reshape((5,2,3)) # print(arr03) # print('=========') # arr1 = np.random.normal(1.5,0.2,(4,5)) # # print(arr1) # print('-'*30) # # print(arr1[1]) # print('-'*30) # # print(arr1[0:2]) # print('-'*30) # # print(arr1[0:2, 3]) # print('-'*30) # # # 0 到 2 的数组 中的 1列到3列 # print(arr1[0:2, 1:3]) # print('-'*30) # # print(arr1[0][1]) # print('-'*30) # arr1 = np.arange(24).reshape((4,6)) # print(arr1) # # # numpy 三元运算 # arr2 = np.where(arr1 < 10, 0, 10) # print(arr2) # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]] # [[ 0 0 0 0 0 0] # [ 0 0 0 0 10 10] # [10 10 10 10 10 10] # [10 10 10 10 10 10]] ### ### 数组间的运算 # # a1 = np.array([[1, 2, 3], [4, 5, 6]]) # a2 = np.array([[5, 10, 15], [5, 6, 7]]) # # print(a1 + a2) # a = np.array([6,15,25]) # print(a.mean()) # # print(np.average(a, weights=[1,2,3])) # a = np.array ([ # [80, 86], [82, 80], [85, 78], [90, 90], [86, 82], [82, 90], [78, 80], # [92, 94] # # ]) # print(np.max(a)) # print(np.min(a)) # print(np.mean(a)) # 平均数 # print(np.mean(a, axis=0)) # 平均值 # print(np.max(a, axis=1))
import pandas as pd
content_csv = pd.read_csv('./IMDB-Movie-Data.csv')
print(content_csv)
import pandas as pd import numpy as np # # s1 = pd.Series([1,2,3,4,5]) # # arr1 = np.arange(10) # # print(pd.Series(arr1)) # print('-------------') # print(pd.Series([[3,45,6],['hehe','a']])) # print('-------------') # # # # 取出数据 切片 # print(s1[2]) # print('-'*30) # print(s1[2:4]) # print('-'*30) # # 自定义索引 # s2 = pd.Series([1,23,4,5],[10,'a','b','c']) # print(s2) # print(s2[10]) # print(s2['a']) # print(s2['b']) # # # # csv文件读取 s1 = pd.read_csv('./dogNames2.csv') # print(s1.shape) # print('---------------') # print(s1.dtypes) # print('-----------') # print(s1.ndim) # 维度 # print('------------') # print(s1.index) # 行索引 # print('------------') # print(s1.columns) # 列索引 # print('------------') # print(s1.values) # 值索引 # # # # s1.head(3) # 显示头部几行,默认5行 # s1.tail(3) # 显示末尾几行,默认5行 # # s1.info() # 相关信息概览:行数,列数,列索引,列非空值个数,列类型,列类型,内存占用 # # s1.describe() #快速综合统计结果:计数,均值,标准差,最大值,四分位数,最小s1 ### ====================== ### 过滤 判断 print(s1['Count_AnimalName'] > 800) print('-------------') # 获取结果为 true 名字超过800 print(s1[s1['Count_AnimalName'] > 800]) # 排序 处理 print(s1.sort_values('Count_AnimalName')) # ascending=False 从大到小 print('------------') print(s1.sort_values('Count_AnimalName', ascending=False))
获取nan 打印type 类型
import pandas as pd
# 读取csv文件 得到DataFrame对象
s1 = pd.read_csv('./IMDB-Movie-Data.csv')
print(s1['Revenue (Millions)'][7]) # 获取一个nan数据
print(type(s1['Revenue (Millions)'][7])) #<class 'numpy.float64'> 注意nan是float类型
替换缺失值 为Nan
import pandas as pd
import numpy as np
# 读取csv文件 获取Dataframe 对象
s1 = pd.read_csv('./breast.data')
s2 = s1.replace(to_replace='?', value=np.nan) #把数据替换成nan
print(s2[20:34])
groupby分组
import pandas as pd import numpy as np # content_csv = pd.read_csv('./IMDB-Movie-Data.csv') # # print(content_csv) df = pd.DataFrame({ 'name': ['张三','李四','王五','李四','王五','王五','赵六'], 'chinese':np.random.randint(35,100,7), 'math':np.random.randint(35,100,7), 'english':np.random.randint(35,100,7), 'test': ['一','一','一','二','二','三','一'] }) # print(df) ### groupby 分组 ### ================= # 创建group对象,未进行计算 print(df.groupby('name')) # 调用函数做分组计算,分组后求每组平均值 # 可将列名或列值当作分组对象,分组中数值列会被聚合,非数值列会从结果中排除 a = df.groupby('name').mean() # print(a) # chinese math english # name # 张三 69.000000 40.000000 69.000000 # 李四 72.000000 91.000000 49.500000 # 王五 64.333333 60.666667 58.333333 # 赵六 86.000000 88.000000 37.000000 # 如果不想使用分组列作为索引,设置参数as_index=Falsed b = df.groupby('name',as_index=False).mean() # print(b) # name chinese math english # 0 张三 69.000000 40.000000 69.000000 # 1 李四 72.000000 91.000000 49.500000 # 2 王五 64.333333 60.666667 58.333333 # 3 赵六 86.000000 88.000000 37.000000 # 对多列分组后求平均值 c = df.groupby(['name','chinese']).mean() c2 = df.groupby(['name','chinese'],as_index=False).mean() print(c) math english # name chinese # 张三 40 85 67 # 李四 44 40 57 # 84 85 78 # 王五 35 44 56 # 40 80 81 # 65 35 85 # 赵六 39 92 56 print(c2) # # name chinese math english # 0 张三 40 85 67 # 1 李四 44 40 57 # 2 李四 84 85 78 # 3 王五 35 44 56 # 4 王五 40 80 81 # 5 王五 65 35 85 # 6 赵六 39 92 56
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。