二十、數據可視化 · 數據科學和人工智能技術筆記

# 二十、數據可視化 > 作者：[Chris Albon](https://chrisalbon.com/) > > 譯者：[飛龍](https://github.com/wizardforcel) > > 協議：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) ## MatPlotLib 中的雙向條形圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np # 創建數據幀 raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'pre_score': [4, 24, 31, 2, 3], 'mid_score': [25, 94, 57, 62, 70], 'post_score': [5, 43, 23, 23, 51]} df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score']) df ``` | | first_name | pre_score | mid_score | post_score | | --- | --- | --- | --- | --- | | 0 | Jason | 4 | 25 | 5 | | 1 | Molly | 24 | 94 | 43 | | 2 | Tina | 31 | 57 | 23 | | 3 | Jake | 2 | 62 | 23 | | 4 | Amy | 3 | 70 | 51 | ```py # 輸入數據，特別是第二和 # 第三行，跳過第一列 x1 = df.ix[1, 1:] x2 = df.ix[2, 1:] # 創建條形標簽 bar_labels = ['Pre Score', 'Mid Score', 'Post Score'] # 創建圖形 fig = plt.figure(figsize=(8,6)) # 設置 y 的位置 y_pos = np.arange(len(x1)) y_pos = [x for x in y_pos] plt.yticks(y_pos, bar_labels, fontsize=10) # 在 y_pos 的位置上創建水平條形 plt.barh(y_pos, # 使用數據 x1 x1, # 中心對齊 align='center', # 透明度為 0.4 alpha=0.4, # 顏色為綠色 color='#263F13') # 在 y_pos 的位置上創建水平條形 plt.barh(y_pos, # 使用數據 -x2 -x2, # 中心對齊 align='center', # 透明度為 0.4 alpha=0.4, # 顏色為綠色 color='#77A61D') # 注解和標簽 plt.xlabel('Tina\'s Score: Light Green. Molly\'s Score: Dark Green') t = plt.title('Comparison of Molly and Tina\'s Score') plt.ylim([-1,len(x1)+0.1]) plt.xlim([-max(x2)-10, max(x1)+10]) plt.grid() plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_back_to_back_bar_plot/matplotlib_back_to_back_bar_plot_6_0.png) ## MatPlotLib 中的條形圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np # 創建數據幀 raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'pre_score': [4, 24, 31, 2, 3], 'mid_score': [25, 94, 57, 62, 70], 'post_score': [5, 43, 23, 23, 51]} df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score']) df ``` | | first_name | pre_score | mid_score | post_score | | --- | --- | --- | --- | --- | | 0 | Jason | 4 | 25 | 5 | | 1 | Molly | 24 | 94 | 43 | | 2 | Tina | 31 | 57 | 23 | | 3 | Jake | 2 | 62 | 23 | | 4 | Amy | 3 | 70 | 51 | ```py # 為每個變量創建得分均值的列表 mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()] # 創建變動列表，設為得分上下 .25 variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25] # 設置條形標簽 bar_labels = ['Pre Score', 'Mid Score', 'Post Score'] # 創建條形的 x 位置 x_pos = list(range(len(bar_labels))) # 在 x 位置上創建條形圖 plt.bar(x_pos, # 使用 mean_values 中的數據 mean_values, # y-error 直線設置為變動 yerr=variance, # 中心對齊 align='center', # 顏色 color='#FFC222', # 透明度為 0.5 alpha=0.5) # 添加網格 plt.grid() # 設置 y 軸高度 max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5) plt.ylim([0, (max_y[0] + max_y[1]) * 1.1]) # 設置軸標簽和標題 plt.ylabel('Score') plt.xticks(x_pos, bar_labels) plt.title('Mean Scores For Each Test') plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_bar_plot/matplotlib_bar_plot_6_0.png) ## Seaborn 中的調色板 ```py import pandas as pd %matplotlib inline import matplotlib.pyplot as plt import seaborn as sns # 創建數據幀 data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41], 'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1], 'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75], 'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72], 'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5], 'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24], 'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]} df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2', 'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5', 'deaths_regiment_6', 'deaths_regiment_7']) df = df.set_index(df.date) sns.palplot(sns.color_palette("deep", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_5_0.png) ```py sns.palplot(sns.color_palette("muted", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_6_0.png) ```py sns.palplot(sns.color_palette("bright", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_7_0.png) ```py sns.palplot(sns.color_palette("dark", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_8_0.png) ```py sns.palplot(sns.color_palette("colorblind", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_9_0.png) ```py sns.palplot(sns.color_palette("Paired", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_10_0.png) ```py sns.palplot(sns.color_palette("BuGn", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_11_0.png) ```py sns.palplot(sns.color_palette("GnBu", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_12_0.png) ```py sns.palplot(sns.color_palette("OrRd", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_13_0.png) ```py sns.palplot(sns.color_palette("PuBu", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_14_0.png) ```py sns.palplot(sns.color_palette("YlGn", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_15_0.png) ```py sns.palplot(sns.color_palette("YlGnBu", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_16_0.png) ```py sns.palplot(sns.color_palette("YlOrBr", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_17_0.png) ```py sns.palplot(sns.color_palette("YlOrRd", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_18_0.png) ```py sns.palplot(sns.color_palette("BrBG", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_19_0.png) ```py sns.palplot(sns.color_palette("PiYG", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_20_0.png) ```py sns.palplot(sns.color_palette("PRGn", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_21_0.png) ```py sns.palplot(sns.color_palette("PuOr", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_22_0.png) ```py sns.palplot(sns.color_palette("RdBu", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_23_0.png) ```py sns.palplot(sns.color_palette("RdGy", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_24_0.png) ```py sns.palplot(sns.color_palette("RdYlBu", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_25_0.png) ```py sns.palplot(sns.color_palette("RdYlGn", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_26_0.png) ```py sns.palplot(sns.color_palette("Spectral", 10)) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_27_0.png) ```py # 創建調色板并將其設為當前調色板 flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"] sns.set_palette(flatui) sns.palplot(sns.color_palette()) ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_29_0.png) ```py # 設置繪圖顏色 sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4, df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e") # <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70> ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/seaborn_color_palettes_31_1.png) ## 使用 Seaborn 和 pandas 創建時間序列繪圖 ```py import pandas as pd %matplotlib inline import matplotlib.pyplot as plt import seaborn as sns data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41], 'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1], 'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75], 'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72], 'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5], 'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24], 'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]} df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2', 'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5', 'deaths_regiment_6', 'deaths_regiment_7']) df = df.set_index(df.date) sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4, df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred") # <matplotlib.axes._subplots.AxesSubplot at 0x1140be780> ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_pandas_timeseries_plot/seaborn_pandas_timeseries_plot_5_1.png) ```py # 帶有置信區間直線，但是沒有直線的時間序列繪圖 sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4, df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False) # <matplotlib.axes._subplots.AxesSubplot at 0x116400668> ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_pandas_timeseries_plot/seaborn_pandas_timeseries_plot_7_1.png) ## 使用 Seaborn 創建散點圖 ```py import pandas as pd %matplotlib inline import random import matplotlib.pyplot as plt import seaborn as sns # 創建空數據幀 df = pd.DataFrame() # 添加列 df['x'] = random.sample(range(1, 1000), 5) df['y'] = random.sample(range(1, 1000), 5) df['z'] = [1,0,0,1,0] df['k'] = ['male','male','male','female','female'] # 查看前幾行數據 df.head() ``` | | x | y | z | k | | --- | --- | --- | --- | --- | | 0 | 466 | 948 | 1 | male | | 1 | 832 | 481 | 0 | male | | 2 | 978 | 465 | 0 | male | | 3 | 510 | 206 | 1 | female | | 4 | 848 | 357 | 0 | female | ```py # 設置散點圖樣式 sns.set_context("notebook", font_scale=1.1) sns.set_style("ticks") # 創建數據幀的散點圖 sns.lmplot('x', # 橫軸 'y', # 縱軸 data=df, # 數據源 fit_reg=False, # 不要擬合回歸直線 hue="z", # 設置顏色 scatter_kws={"marker": "D", # 設置標記樣式 "s": 100}) # 設置標記大小 # 設置標題 plt.title('Histogram of IQ') # 設置橫軸標簽 plt.xlabel('Time') # 設置縱軸標簽 plt.ylabel('Deaths') # <matplotlib.text.Text at 0x112b7bb70> ``` ![png](https://chrisalbon.com/python/data_visualization/seaborn_scatterplot/seaborn_scatterplot_7_1.png) ## MatPlotLib 中的分組條形圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'pre_score': [4, 24, 31, 2, 3], 'mid_score': [25, 94, 57, 62, 70], 'post_score': [5, 43, 23, 23, 51]} df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score']) df ``` | | first_name | pre_score | mid_score | post_score | | --- | --- | --- | --- | --- | | 0 | Jason | 4 | 25 | 5 | | 1 | Molly | 24 | 94 | 43 | | 2 | Tina | 31 | 57 | 23 | | 3 | Jake | 2 | 62 | 23 | | 4 | Amy | 3 | 70 | 51 | ```py # 設置條形的位置和寬度 pos = list(range(len(df['pre_score']))) width = 0.25 # 繪制條形 fig, ax = plt.subplots(figsize=(10,5)) # 使用 pre_score 數據， # 在位置 pos 上創建條形 plt.bar(pos, # 使用數據 df['pre_score'] df['pre_score'], # 寬度 width, # 透明度為 0.5 alpha=0.5, # 顏色 color='#EE3224', # 標簽是 first_name 的第一個值 label=df['first_name'][0]) # 使用 mid_score 數據， # 在位置 pos + 一定寬度上創建條形 plt.bar([p + width for p in pos], # 使用數據 df['mid_score'] df['mid_score'], # 寬度 width, # 透明度為 0.5 alpha=0.5, # 顏色 color='#F78F1E', # 標簽是 first_name 的第二個值 label=df['first_name'][1]) # 使用 post_score 數據， # 在位置 pos + 一定寬度上創建條形 plt.bar([p + width*2 for p in pos], # 使用數據 df['post_score'] df['post_score'], # 寬度 width, # 透明度為 0.5 alpha=0.5, # 顏色 color='#FFC222', # 標簽是 first_name 的第三個值 label=df['first_name'][2]) # 設置縱軸標簽 ax.set_ylabel('Score') # 設置標題 ax.set_title('Test Subject Scores') # 設置 x 刻度的位置 ax.set_xticks([p + 1.5 * width for p in pos]) # 設置 x 刻度的標簽 ax.set_xticklabels(df['first_name']) # 設置橫軸和縱軸的區域 plt.xlim(min(pos)-width, max(pos)+width*4) plt.ylim([0, max(df['pre_score'] + df['mid_score'] + df['post_score'])] ) # 添加圖例并展示繪圖 plt.legend(['Pre Score', 'Mid Score', 'Post Score'], loc='upper left') plt.grid() plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_grouped_bar_plot/matplotlib_grouped_bar_plot_6_0.png) ## MatPlotLib 中的直方圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np import math # 設置 ipython 的最大行數 pd.set_option('display.max_row', 1000) # 將 ipython 的最大列寬設為 50 pd.set_option('display.max_columns', 50) df = pd.read_csv('https://www.dropbox.com/s/52cb7kcflr8qm2u/5kings_battles_v1.csv?dl=1') df.head() ``` | | name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | 0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1 | 0 | 15000 | 4000 | Jaime Lannister | Clement Piper, Vance | 1 | Golden Tooth | The Westerlands | NaN | | 1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1 | 0 | NaN | 120 | Gregor Clegane | Beric Dondarrion | 1 | Mummer's Ford | The Riverlands | NaN | | 2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0 | 1 | 15000 | 10000 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1 | Riverrun | The Riverlands | NaN | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1 | 1 | 18000 | 20000 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1 | Green Fork | The Riverlands | NaN | | 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1 | 1 | 1875 | 6000 | Robb Stark, Brynden Tully | Jaime Lannister | 1 | Whispering Wood | The Riverlands | NaN | ```py # 制作攻擊方和防守方大小的兩個變量 # 但是當有超過 10000 個攻擊方時將其排除在外 data1 = df['attacker_size'][df['attacker_size'] < 90000] data2 = df['defender_size'][df['attacker_size'] < 90000] # 創建 2000 個桶 bins = np.arange(data1.min(), data2.max(), 2000) # 固定桶的大小 # 繪制攻擊方大小的直方圖 plt.hist(data1, bins=bins, alpha=0.5, color='#EDD834', label='Attacker') # 繪制防守方大小的直方圖 plt.hist(data2, bins=bins, alpha=0.5, color='#887E43', label='Defender') # 設置圖形的 x 和 y 邊界 plt.ylim([0, 10]) # 設置標題和標簽 plt.title('Histogram of Attacker and Defender Size') plt.xlabel('Number of troops') plt.ylabel('Number of battles') plt.legend(loc='upper right') plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_histogram/matplotlib_histogram_6_0.png) ```py # 制作攻擊方和防守方大小的兩個變量 # 但是當有超過 10000 個攻擊方時將其排除在外 data1 = df['attacker_size'][df['attacker_size'] < 90000] data2 = df['defender_size'][df['attacker_size'] < 90000] # 創建 10 個桶，最小值為 # data1 和 data2 的最小值 bins = np.linspace(min(data1 + data2), # 最大值為它們的最大值 max(data1 + data2), # 并分為 10 個桶 10) # 繪制攻擊方大小的直方圖 plt.hist(data1, # 使用定義好的桶 bins=bins, # 透明度 alpha=0.5, # 顏色 color='#EDD834', # 攻擊方的標簽 label='Attacker') # 繪制防守方大小的直方圖 plt.hist(data2, # 使用定義好的桶 bins=bins, # 透明度 alpha=0.5, # 顏色 color='#887E43', # 防守方的標簽 label='Defender') # 設置圖形的 x 和 y 邊界 plt.ylim([0, 10]) # 設置標題和標簽 plt.title('Histogram of Attacker and Defender Size') plt.xlabel('Number of troops') plt.ylabel('Number of battles') plt.legend(loc='upper right') plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_histogram/matplotlib_histogram_8_0.png) ## 從 Pandas 數據幀生成 MatPlotLib 散點圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 'female': [0, 1, 1, 0, 1], 'age': [42, 52, 36, 24, 73], 'preTestScore': [4, 24, 31, 2, 3], 'postTestScore': [25, 94, 57, 62, 70]} df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore']) df ``` | | first_name | last_name | age | female | preTestScore | postTestScore | | --- | --- | --- | --- | --- | --- | --- | | 0 | Jason | Miller | 42 | 0 | 4 | 25 | | 1 | Molly | Jacobson | 52 | 1 | 24 | 94 | | 2 | Tina | Ali | 36 | 1 | 31 | 57 | | 3 | Jake | Milner | 24 | 0 | 2 | 62 | | 4 | Amy | Cooze | 73 | 1 | 3 | 70 | ```py # preTestScore 和 postTestScore 的散點圖 # 每個點的大小取決于年齡 plt.scatter(df.preTestScore, df.postTestScore , s=df.age) # <matplotlib.collections.PathCollection at 0x10ca42b00> ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_scatterplot_from_pandas/matplotlib_scatterplot_from_pandas_6_1.png) ```py # preTestScore 和 postTestScore 的散點圖 # 大小為 300，顏色取決于性別 plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female) # <matplotlib.collections.PathCollection at 0x10cb90a90> ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_scatterplot_from_pandas/matplotlib_scatterplot_from_pandas_8_1.png) ## Matplotlib 的簡單示例 ```py # 讓 Jupyter 加載 matplotlib # 并內聯創建所有繪圖（也就是在頁面上） %matplotlib inline import matplotlib.pyplot as pyplot pyplot.plot([1.6, 2.7]) # [<matplotlib.lines.Line2D at 0x10c4e7978>] ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_simple_example/matplotlib_simple_example_6_1.png) ## MatPlotLib 中的餅圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt raw_data = {'officer_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'jan_arrests': [4, 24, 31, 2, 3], 'feb_arrests': [25, 94, 57, 62, 70], 'march_arrests': [5, 43, 23, 23, 51]} df = pd.DataFrame(raw_data, columns = ['officer_name', 'jan_arrests', 'feb_arrests', 'march_arrests']) df ``` | | officer_name | jan_arrests | feb_arrests | march_arrests | | --- | --- | --- | --- | --- | | 0 | Jason | 4 | 25 | 5 | | 1 | Molly | 24 | 94 | 43 | | 2 | Tina | 31 | 57 | 23 | | 3 | Jake | 2 | 62 | 23 | | 4 | Amy | 3 | 70 | 51 | ```py # 創建一列，其中包含每個官員的總逮捕數 df['total_arrests'] = df['jan_arrests'] + df['feb_arrests'] + df['march_arrests'] df ``` | | officer_name | jan_arrests | feb_arrests | march_arrests | total_arrests | | --- | --- | --- | --- | --- | --- | | 0 | Jason | 4 | 25 | 5 | 34 | | 1 | Molly | 24 | 94 | 43 | 161 | | 2 | Tina | 31 | 57 | 23 | 111 | | 3 | Jake | 2 | 62 | 23 | 87 | | 4 | Amy | 3 | 70 | 51 | 124 | ```py # （從 iWantHue）創建一列顏色 colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"] # 創建餅圖 plt.pie( # 使用數據 total_arrests df['total_arrests'], # 標簽為官員名稱 labels=df['officer_name'], # 沒有陰影 shadow=False, # 顏色 colors=colors, # 將一塊扇形移出去 explode=(0, 0, 0, 0, 0.15), # 起始角度為 90 度 startangle=90, # 將百分比列為分數 autopct='%1.1f%%', ) # 使餅狀圖為正圓 plt.axis('equal') # 查看繪圖 plt.tight_layout() plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_pie_chart/matplotlib_pie_chart_7_0.png) ## MatPlotLib 中的散點圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np # 展示 ipython 的最大行數 pd.set_option('display.max_row', 1000) # 將 ipython 的最大列寬設為 50 pd.set_option('display.max_columns', 50) df = pd.read_csv('https://raw.githubusercontent.com/chrisalbon/war_of_the_five_kings_dataset/master/5kings_battles_v1.csv') df.head() ``` | | name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | 0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1.0 | 0.0 | 15000.0 | 4000.0 | Jaime Lannister | Clement Piper, Vance | 1.0 | Golden Tooth | The Westerlands | NaN | | 1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1.0 | 0.0 | NaN | 120.0 | Gregor Clegane | Beric Dondarrion | 1.0 | Mummer's Ford | The Riverlands | NaN | | 2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0.0 | 1.0 | 15000.0 | 10000.0 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1.0 | Riverrun | The Riverlands | NaN | | 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1.0 | 1.0 | 18000.0 | 20000.0 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1.0 | Green Fork | The Riverlands | NaN | | 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1.0 | 1.0 | 1875.0 | 6000.0 | Robb Stark, Brynden Tully | Jaime Lannister | 1.0 | Whispering Wood | The Riverlands | NaN | ```py # 創建圖形 plt.figure(figsize=(10,8)) # 創建散點圖 # 298 年的攻擊方大小為 x 軸 plt.scatter(df['attacker_size'][df['year'] == 298], # 298 年的防守方大小為 y 軸 df['defender_size'][df['year'] == 298], # 標記 marker='x', # 顏色 color='b', # 透明度 alpha=0.7, # 大小 s = 124, # 標簽 label='Year 298') # 299 年的攻擊方大小為 x 軸 plt.scatter(df['attacker_size'][df['year'] == 299], # 299 年的防守方大小為 y 軸 df['defender_size'][df['year'] == 299], # 標記 marker='o', # 顏色 color='r', # 透明度 alpha=0.7, # 大小 s = 124, # 標簽 label='Year 299') # 300 年的攻擊方大小為 x 軸 plt.scatter(df['attacker_size'][df['year'] == 300], # 300 年的防守方大小為 x 軸 df['defender_size'][df['year'] == 300], # 標記 marker='^', # 顏色 color='g', # 透明度 alpha=0.7, # 大小 s = 124, # 標簽 label='Year 300') # 標題 plt.title('Battles Of The War Of The Five Kings') # y 標簽 plt.ylabel('Defender Size') # x 標簽 plt.xlabel('Attacker Size') # 圖例 plt.legend(loc='upper right') # 設置圖形邊界 plt.xlim([min(df['attacker_size'])-1000, max(df['attacker_size'])+1000]) plt.ylim([min(df['defender_size'])-1000, max(df['defender_size'])+1000]) plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_simple_scatterplot/matplotlib_simple_scatterplot_6_0.png) ## MatPlotLib 中的棧式百分比條形圖 ```py %matplotlib inline import pandas as pd import matplotlib.pyplot as plt raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'pre_score': [4, 24, 31, 2, 3], 'mid_score': [25, 94, 57, 62, 70], 'post_score': [5, 43, 23, 23, 51]} df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score']) df ``` | | first_name | pre_score | mid_score | post_score | | --- | --- | --- | --- | --- | | 0 | Jason | 4 | 25 | 5 | | 1 | Molly | 24 | 94 | 43 | | 2 | Tina | 31 | 57 | 23 | | 3 | Jake | 2 | 62 | 23 | | 4 | Amy | 3 | 70 | 51 | ```py # 創建帶有一個子圖的圖形 f, ax = plt.subplots(1, figsize=(10,5)) # 將條寬設為 1 bar_width = 1 # 條形左邊界的位置 bar_l = [i for i in range(len(df['pre_score']))] # x 軸刻度的位置（條形的中心是條形標簽） tick_pos = [i+(bar_width/2) for i in bar_l] # 創建每個參與者的總得分 totals = [i+j+k for i,j,k in zip(df['pre_score'], df['mid_score'], df['post_score'])] # 創建每個參與者的 pre_score 和總得分的百分比 pre_rel = [i / j * 100 for i,j in zip(df['pre_score'], totals)] # 創建每個參與者的 mid_score 和總得分的百分比 mid_rel = [i / j * 100 for i,j in zip(df['mid_score'], totals)] # 創建每個參與者的 post_score 和總得分的百分比 post_rel = [i / j * 100 for i,j in zip(df['post_score'], totals)] # 在位置 bar_1 創建條形圖 ax.bar(bar_l, # 使用數據 pre_rel pre_rel, # 標簽 label='Pre Score', # 透明度 alpha=0.9, # 顏色 color='#019600', # 條形寬度 width=bar_width, # 邊框顏色 edgecolor='white' ) # 在位置 bar_1 創建條形圖 ax.bar(bar_l, # 使用數據 mid_rel mid_rel, # 底部為 pre_rel bottom=pre_rel, # 標簽 label='Mid Score', # 透明度 alpha=0.9, # 顏色 color='#3C5F5A', # 條形寬度 width=bar_width, # 邊框顏色 edgecolor='white' ) # Create a bar chart in position bar_1 ax.bar(bar_l, # 使用數據 post_rel post_rel, # 底部為 pre_rel 和 mid_rel bottom=[i+j for i,j in zip(pre_rel, mid_rel)], # 標簽 label='Post Score', # 透明度 alpha=0.9, # 顏色 color='#219AD8', # 條形寬度 width=bar_width, # 邊框顏色 edgecolor='white' ) # 將刻度設為 first_name plt.xticks(tick_pos, df['first_name']) ax.set_ylabel("Percentage") ax.set_xlabel("") # 設置圖形邊界 plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width]) plt.ylim(-10, 110) # 旋轉軸標簽 plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right') # 展示繪圖 plt.show() ``` ![png](https://chrisalbon.com/python/data_visualization/matplotlib_percentage_stacked_bar_plot/matplotlib_percentage_stacked_bar_plot_6_0.png)