admin 管理员组文章数量: 1086865
【python数据处理】matplotlib
最近学习python与数据处理时,发现数据总是很生硬,所以我便开始学习python一个强大的库matplotlib,可以有效将数据转化为直观的图形。
1.一些公共的方法
标签控制 Modify Ticks 调整横纵坐标值
注意要修改坐标名为sting类型时要 先使用plt.subplot
ax = plt.subplot()
ax.set_xticks(months)
ax.set_xticklabels(month_names)
ax.set_yticks([0.10, 0.25, 0.5, 0.75])
ax.set_yticklabels(["10%", "25%", "50%", "75%"])ax2 = plt.subplot()
ax2.set_xticks(range(len(drinks)))
ax2.set_xticklabels(drinks)
plt.show()
2.标题 顾名思义
plt.xlabel("Time")
plt.ylabel("Dollars spent on coffee")
plt.title("My Last Twelve Years of Coffee Drinking")
3.plt.legend(legend_labels, loc=8) 标签 给每一条线标个名字方便查看
Number Code:String:0.best 1.upper right 2.upper left 3.lower left 4.lower right 5.right 6.center left 7.center right 8.lower center 9.upper center 10.center
4.保存及大小设定
plt.close('all')防止一些未显示的图线捣乱
plt.figure(figsize=(width,height)) 设置大小
plt.savefig('power_generated.png') 保存
import codecademylib
from matplotlib import pyplot as pltword_length = [8, 11, 12, 11, 13, 12, 9, 9, 7, 9]
power_generated = [753.9, 768.8, 780.1, 763.7, 788.5, 782, 787.2, 806.4, 806.2, 798.9]
years = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009]plt.close('all')
plt.figure()
plt.plot(years, word_length)
plt.savefig('winning_word_lengths.png')
plt.figure(figsize=(7, 3))
plt.plot(years, power_generated)
plt.savefig('power_generated.png')
5.plt.axis
显示特定区间 可以理解为放大或缩小
plt.axis([x-min, x-max, y-min, y-max])显示这一段线
6.plt.subplot
用来创建多个图像 和用来修改x,y轴坐标 使用ax
子图
plt.subplot()
需要传递三个参数:
子图的行数
子图的列数
子图索引
子图间隔控制
plt.subplots_adjust(wspace=0.35, bottom=0.2)
plt.subplot(2, 1, 1)
plt.plot(x, straight_line)# Subplot 2
plt.subplot(2, 2, 3)
plt.plot(x, parabola)# Subplot 3
plt.subplot(2, 2, 4)
plt.plot(x, cubic)plt.subplots_adjust(wspace=0.35, bottom=0.2)
上下两个subplot如果前面的传入值一样会在一起共用plt.show(),如果修改了figsize会变成两张图,独立使用plt.show()
下方就是分别打印
import codecademylib3_seaborn
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd# Bar Graph: Featured Gamesgames = ["LoL", "Dota 2", "CS:GO", "DayZ", "HOS", "Isaac", "Shows", "Hearth", "WoT", "Agar.io"]viewers = [1070, 472, 302, 239, 210, 171, 170, 90, 86, 71]ax=plt.subplot()
plt.figure(figsize=(5,5))plt.bar(range(len(games)),viewers,color='slateblue')
ax.set_xticks(range(len(games)))
ax.set_xticklabels(games, rotation=30)plt.title('Each game viewers')
plt.legend(["Twitch"])
plt.xlabel('Games')
plt.ylabel('Viewers')plt.show()# Pie Chart: League of Legends Viewers' Whereaboutslabels = ["US", "DE", "CA", "N/A", "GB", "TR", "BR", "DK", "PL", "BE", "NL", "Others"]ax2=plt.subplot()
plt.figure(figsize=(5,5))
countries = [447, 66, 64, 49, 45, 28, 25, 20, 19, 17, 17, 279]
plt.pie(countries,labels=labels)
plt.axis('equal')
plt.show()
折线图
1. plt.plot(x, y) 画一条x为横坐标y为纵坐标的直线
可以画多条
2.样式控制
color 颜色 ,
linstyle='--' 表示线条是虚线 ,':' 表示是虚线 不过是点点点样子,''看不见没有线条
marker='s' 关键节点是square 方块 , 'o'表示圆 , '*'表示星号
plt.plot(time, revenue, color="purple", linestyle='--') 紫色虚线
plt.plot(time, costs, color="#82edc9", marker='s') 蓝色方块结点线
3.Fill Between 类似于barchart 的error bar
import codecademylib
from matplotlib import pyplot as pltmonths = range(12)
month_names = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
revenue = [16000, 14000, 17500, 19500, 21500, 21500, 22000, 23000, 20000, 19500, 18000, 16500]plt.plot(months, revenue)ax = plt.subplot()
ax.set_xticks(months)
ax.set_xticklabels(month_names)y_upper = [i + (i*0.10) for i in revenue]
y_lower = [i - (i*0.10) for i in revenue]plt.fill_between(months, y_lower, y_upper, alpha=0.2)plt.show()
import codecademylib3_seaborn
from matplotlib import pyplot as plt
import numpy as np
import pandas as pdhour = range(24)viewers_hour = [30, 17, 34, 29, 19, 14, 3, 2, 4, 9, 5, 48, 62, 58, 40, 51, 69, 55, 76, 81, 102, 120, 71, 63]plt.title("Codecademy Learners Time Series")plt.xlabel("Hour")
plt.ylabel("Viewers")plt.plot(hour, viewers_hour)plt.legend(['2015-01-01'])ax = plt.subplot()ax.set_facecolor('seashell')ax.set_xticks(hour)
ax.set_yticks([0, 20, 40, 60, 80, 100, 120])y_upper = [i + (i*0.15) for i in viewers_hour]
y_lower = [i - (i*0.15) for i in viewers_hour]plt.fill_between(hour, y_lower, y_upper, alpha=0.2)# Add the code here:
plt.show()
条形图
1.相对于折线图 画图为 plt.bar(x,y) 一般x可以用range(len(y))表示
import codecademylib
from matplotlib import pyplot as pltdrinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
sales = [91, 76, 56, 66, 52, 27]plt.bar(range(len(sales)), sales)
plt.bar(len(sales),sales)
plt.show()
2.Side-By-Side bar
import codecademylib
from matplotlib import pyplot as pltdrinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
sales1 = [91, 76, 56, 66, 52, 27]
sales2 = [65, 82, 36, 68, 38, 40]#Paste the x_values code here
n = 1 # This is our first dataset (out of 2)
t = 2 # Number of dataset
d = 6 # Number of sets of bars
w = 0.8 # Width of each bar
store1_x = [t*element + w*n for elementin range(d)]plt.bar(store1_x, sales1)
#Paste the x_values code here
n = 2 # This is our second dataset (out of 2)
t = 2 # Number of dataset
d = 6 # Number of sets of bars
w = 0.8 # Width of each bar
store2_x = [t*element + w*n for elementin range(d)]plt.bar(store2_x, sales2)plt.show()
# 导入绘图模块
import matplotlib.pyplot as plt
import numpy as np
# 构建数据
Y2016 = [15600,12700,11300,4270,3620]
Y2017 = [17400,14800,12000,5200,4020]
labels = ['北京','上海','香港','深圳','广州']
bar_width = 0.35# 中文乱码的处理
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False# 绘图
plt.bar(np.arange(5), Y2016, label = '2016', color = 'steelblue', alpha = 0.8, width = bar_width)
plt.bar(np.arange(5)+bar_width, Y2017, label = '2017', color = 'indianred', alpha = 0.8, width = bar_width)
# 添加轴标签
plt.xlabel('Top5城市')
plt.ylabel('家庭数量')
# 添加标题
plt.title('亿万财富家庭数Top5城市分布')
# 添加刻度标签
plt.xticks(np.arange(5)+bar_width/2,labels)
# 设置Y轴的刻度范围
plt.ylim([2500, 19000])# 为每个条形图添加数值标签
for x2016,y2016 in enumerate(Y2016):plt.text(x2016, y2016+100, '%s' %y2016)for x2017,y2017 in enumerate(Y2017):plt.text(x2017+bar_width, y2017+100, '%s' %y2017)
# 显示图例
plt.legend()
# 显示图形
plt.show()
3.Stacked Bars
import codecademylib
from matplotlib import pyplot as pltdrinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
sales1 = [91, 76, 56, 66, 52, 27]
sales2 = [65, 82, 36, 68, 38, 40]plt.bar(range(len(drinks)), sales1)
plt.bar(range(len(drinks)), sales2, bottom=sales1)plt.legend(["Location 1", "Location 2"])plt.show()
import codecademylib
from matplotlib import pyplot as plt
import numpy as npunit_topics = ['Limits', 'Derivatives', 'Integrals', 'Diff Eq', 'Applications']
As = [6, 3, 4, 3, 5]
Bs = [8, 12, 8, 9, 10]
Cs = [13, 12, 15, 13, 14]
Ds = [2, 3, 3, 2, 1]
Fs = [1, 0, 0, 3, 0]x = range(5)c_bottom = np.add(As, Bs)
d_bottom = np.add(c_bottom, Cs)
f_bottom = np.add(d_bottom, Ds)
#create d_bottom and f_bottom here#create your plot here
plt.figure(figsize=(10,8))
plt.bar(x, As)
plt.bar(x, Bs, bottom=As)
plt.bar(x, Cs, bottom=c_bottom)
plt.bar(x, Ds, bottom=d_bottom)
plt.bar(x, Fs, bottom=f_bottom)ax = plt.subplot()
ax.set_xticks(range(len(unit_topics)))
ax.set_xticklabels(unit_topics)plt.title('Grade distribution')
plt.xlabel('Unit')
plt.ylabel('Number of Students')
plt.show()
plt.savefig('my_stacked_bar.png')
4.Error bars 允许有偏差值
import codecademylib
from matplotlib import pyplot as pltdrinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
ounces_of_milk = [6, 9, 4, 0, 9, 0]
error = [0.6, 0.9, 0.4, 0, 0.9, 0]# Plot the bar graph here
plt.bar(range(len(drinks)), ounces_of_milk, yerr=error, capsize=5)plt.show()
中文例子:
# 导入绘图模块
import matplotlib.pyplot as plt
# 构建数据
GDP = [12406.8,13908.57,9386.87,9143.64]# 中文乱码的处理
#plt.rcParams['font.sans-serif'] =['Microsoft YaHei']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False# 绘图 颜色和透明值可以修改
plt.bar(range(4), GDP, align = 'center',color='steelblue', alpha = 0.8)
# 添加轴标签
plt.ylabel('GDP')
#plt.xlabel('省市')
# 添加标题
plt.title('四个直辖市GDP大比拼')
# 添加刻度标签
plt.xticks(range(4),['北京市','上海市','天津市','重庆市'])
# 设置Y轴的刻度范围
plt.ylim([5000,15000])# 为每个条形图添加数值标签
for x,y in enumerate(GDP):plt.text(x,y+100,'%s' %round(y,1),ha='center')# 显示图形
plt.show()
# 导入绘图模块
import matplotlib.pyplot as plt
# 构建数据
price = [39.5,39.9,45.4,38.9,33.34]# 中文乱码的处理
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False# 绘图
plt.barh(range(5), price, align = 'center',color='steelblue', alpha = 0.8)
# 添加轴标签
plt.xlabel('价格')
# 添加标题
plt.title('不同平台书的最低价比较')
# 添加刻度标签
plt.yticks(range(5),['亚马逊','当当网','中国图书网','京东','天猫'])
# 设置Y轴的刻度范围
plt.xlim([32,47])# 为每个条形图添加数值标签
for x,y in enumerate(price):plt.text(y+0.1,x,'%s' %y,va='center')
# 显示图形
plt.show()
饼图
1.相对于折线图 和bar chart 饼图画图为 plt.pie(x)
plt.axis('equal') 表示正圆 否则是椭圆
import codecademylib
from matplotlib import pyplot as plt
import numpy as nppayment_method_names = ["Card Swipe", "Cash", "Apple Pay", "Other"]
payment_method_freqs = [270, 77, 32, 11]#make your pie chart here
plt.axis('equal')
plt.pie(payment_method_freqs)plt.show()
2.有两种标签方式
plt.pie(payment_method_freqs,labels=payment_method_names)plt.legend(payment_method_names)
3.百分比设置
autopct='%0.1f%%'
'%0.2f' :4.08
'%0.2f%%' :4.08%
'%d%%' : 4%
plt.pie(budget_data,labels=budget_categories,autopct='%0.1f%%')
例子1:
import codecademylib
from matplotlib import pyplot as pltunit_topics = ['Limits', 'Derivatives', 'Integrals', 'Diff Eq', 'Applications']
num_hardest_reported = [1, 3, 10, 15, 1]#Make your plot here
plt.figure(figsize=(10,8))
plt.pie(num_hardest_reported, labels=unit_topics, autopct="%1d%%")plt.axis('equal')
plt.title('Hardest Topics')plt.show()
plt.savefig("my_pie_chart.png")
例子2:
# 导入第三方模块
import matplotlib.pyplot as plt# 设置绘图的主题风格(不妨使用R中的ggplot分隔)
plt.style.use('ggplot')# 构造数据
edu = [0.2515,0.3724,0.3336,0.0368,0.0057]
labels = ['中专','大专','本科','硕士','其他']explode = [0,0.1,0,0,0] # 用于突出显示大专学历人群
colors=['#9999ff','#ff9999','#7777aa','#2442aa','#dd5555'] # 自定义颜色# 中文乱码和坐标轴负号的处理
#plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.sans-serif'] = ['SimHei']
# 将横、纵坐标轴标准化处理,保证饼图是一个正圆,否则为椭圆
plt.axes(aspect='equal')# 控制x轴和y轴的范围
plt.xlim(0,4)
plt.ylim(0,4)# 绘制饼图
plt.pie(x = edu, # 绘图数据explode=explode, # 突出显示大专人群labels=labels, # 添加教育水平标签colors=colors, # 设置饼图的自定义填充色autopct='%.1f%%', # 设置百分比的格式,这里保留一位小数pctdistance=0.8, # 设置百分比标签与圆心的距离labeldistance = 1.15, # 设置教育水平标签与圆心的距离startangle = 180, # 设置饼图的初始角度radius = 1.5, # 设置饼图的半径counterclock = False, # 是否逆时针,这里设置为顺时针方向wedgeprops = {'linewidth': 1.5, 'edgecolor':'green'},# 设置饼图内外边界的属性值textprops = {'fontsize':12, 'color':'k'}, # 设置文本标签的属性值center = (1.8,1.8), # 设置饼图的原点frame = 1 )# 是否显示饼图的图框,这里设置显示# 删除x轴和y轴的刻度
plt.xticks(())
plt.yticks(())
# 添加图标题
plt.title('芝麻信用失信用户教育水平分布')# 显示图形
plt.show()
import codecademylib3_seaborn
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd# Bar Graph: Featured Gamesgames = ["LoL", "Dota 2", "CS:GO", "DayZ", "HOS", "Isaac", "Shows", "Hearth", "WoT", "Agar.io"]viewers = [1070, 472, 302, 239, 210, 171, 170, 90, 86, 71]
colors = ['lightskyblue', 'gold', 'lightcoral', 'gainsboro', 'royalblue', 'lightpink', 'darkseagreen', 'sienna', 'khaki', 'gold', 'violet', 'yellowgreen']
labels = ["US", "DE", "CA", "N/A", "GB", "TR", "BR", "DK", "PL", "BE", "NL", "Others"]
explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)ax2=plt.subplot()
plt.figure(figsize=(5,5))
countries = [447, 66, 64, 49, 45, 28, 25, 20, 19, 17, 17, 279]
plt.pie(countries,labels=labels,colors=colors,explode=explode,shadow=True, startangle=345, pctdistance=1.15)plt.axis('equal')plt.title("League of Legends Viewers' Whereabouts")
plt.legend(labels, loc="right")
plt.show()
Histograms 直方图 最适合显示数据集的图表
plt.hist
tips:pd中的列直接取出是seriese,要使用 df.column.values当做list用
1.hist()的range与bins可以写可以不写,控制横坐标与直方个数, 可以通过range和bins来使其更好显示
import codecademylib
import numpy as np
# Write matplotlib import here:
from matplotlib import pyplot as plt
commutes = np.genfromtxt('commutes.csv', delimiter=',')# Plot histogram here:
plt.hist(commutes,range=(20,50),bins=6)
plt.show()
2.alpha=(0~1) 控制显示虚无度,
normed两张表相差过大时用来使相近
plt.hist(sales_times1, bins=20,alpha=0.4,normed=True)
#plot your other histogram here
plt.hist(sales_times2,bins=20,alpha=0.4,normed=True)
随机成成一组正态分布的数据方法
np.random.normal(mean,std,number)
import codecademylib
import numpy as np
from matplotlib import pyplot as plt# Brachiosaurus
b_data = np.random.normal(6.7,0.7,1000)# Fictionosaurus
f_data = np.random.normal(7.7,0.3,1000)plt.hist(b_data,bins=30, range=(5, 8.5), histtype='step',label='Brachiosaurus')
plt.hist(f_data,bins=30, range=(5, 8.5), histtype='step',label='Fictionosaurus')
plt.xlabel('Femur Length (ft)')
plt.legend(loc=2)
plt.show()
例子:
import codecademylib
from matplotlib import pyplot as pltexam_scores1 = [62.58, 67.63, 81.37, 52.53, 62.98, 72.15, 59.05, 73.85, 97.24, 76.81, 89.34, 74.44, 68.52, 85.13, 90.75, 70.29, 75.62, 85.38, 77.82, 98.31, 79.08, 61.72, 71.33, 80.77, 80.31, 78.16, 61.15, 64.99, 72.67, 78.94]
exam_scores2 = [72.38, 71.28, 79.24, 83.86, 84.42, 79.38, 75.51, 76.63, 81.48,78.81,79.23,74.38,79.27,81.07,75.42,90.35,82.93,86.74,81.33,95.1,86.57,83.66,85.58,81.87,92.14,72.15,91.64,74.21,89.04,76.54,81.9,96.5,80.05,74.77,72.26,73.23,92.6,66.22,70.09,77.2]# Make your plot here
plt.figure(figsize=(10,8))
plt.hist(exam_scores1,bins=12,normed=True,histtype='step',linewidth=2)
plt.hist(exam_scores2,bins=12,normed=True,histtype='step',linewidth=2)plt.legend(["1st Yr Teaching","2nd Yr Teaching"],loc=2)plt.title('Final Exam Score Distribution')
plt.xlabel('percentage')
plt.ylabel('Frequency')plt.show()
plt.savefig('my_historam.png')
散点图
import codecademylib3_seaborn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeansmu = 1
std = 0.5
mu2 = 4.188np.random.seed(100)xs = np.append(np.append(np.append(np.random.normal(0.25,std,100), np.random.normal(0.75,std,100)), np.random.normal(0.25,std,100)), np.random.normal(0.75,std,100))ys = np.append(np.append(np.append(np.random.normal(0.25,std,100), np.random.normal(0.25,std,100)), np.random.normal(0.75,std,100)), np.random.normal(0.75,std,100))values = list(zip(xs, ys))model = KMeans(init='random', n_clusters=2)results = model.fit_predict(values)plt.scatter(xs, ys, c=results, alpha=0.6)colors = ['#6400e4', '#ffc740']for i in range(2):points = np.array([values[j] for j in range(len(values)) if results[j] == i])plt.scatter(points[:, 0], points[:, 1], c=colors[i], alpha=0.6)plt.title('Codecademy Mobile Feedback - Data Science')plt.xlabel('Learn Python')
plt.ylabel('Learn SQL')plt.show()
本文标签: python数据处理matplotlib
版权声明:本文标题:【python数据处理】matplotlib 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.roclinux.cn/p/1697309007a265452.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论