# 二十、數據可視化
> 作者:[Chris Albon](https://chrisalbon.com/)
>
> 譯者:[飛龍](https://github.com/wizardforcel)
>
> 協議:[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)
## MatPlotLib 中的雙向條形圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 創建數據幀
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'pre_score': [4, 24, 31, 2, 3],
'mid_score': [25, 94, 57, 62, 70],
'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
```
| | first_name | pre_score | mid_score | post_score |
| --- | --- | --- | --- | --- |
| 0 | Jason | 4 | 25 | 5 |
| 1 | Molly | 24 | 94 | 43 |
| 2 | Tina | 31 | 57 | 23 |
| 3 | Jake | 2 | 62 | 23 |
| 4 | Amy | 3 | 70 | 51 |
```py
# 輸入數據,特別是第二和
# 第三行,跳過第一列
x1 = df.ix[1, 1:]
x2 = df.ix[2, 1:]
# 創建條形標簽
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']
# 創建圖形
fig = plt.figure(figsize=(8,6))
# 設置 y 的位置
y_pos = np.arange(len(x1))
y_pos = [x for x in y_pos]
plt.yticks(y_pos, bar_labels, fontsize=10)
# 在 y_pos 的位置上創建水平條形
plt.barh(y_pos,
# 使用數據 x1
x1,
# 中心對齊
align='center',
# 透明度為 0.4
alpha=0.4,
# 顏色為綠色
color='#263F13')
# 在 y_pos 的位置上創建水平條形
plt.barh(y_pos,
# 使用數據 -x2
-x2,
# 中心對齊
align='center',
# 透明度為 0.4
alpha=0.4,
# 顏色為綠色
color='#77A61D')
# 注解和標簽
plt.xlabel('Tina\'s Score: Light Green. Molly\'s Score: Dark Green')
t = plt.title('Comparison of Molly and Tina\'s Score')
plt.ylim([-1,len(x1)+0.1])
plt.xlim([-max(x2)-10, max(x1)+10])
plt.grid()
plt.show()
```

## MatPlotLib 中的條形圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 創建數據幀
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'pre_score': [4, 24, 31, 2, 3],
'mid_score': [25, 94, 57, 62, 70],
'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
```
| | first_name | pre_score | mid_score | post_score |
| --- | --- | --- | --- | --- |
| 0 | Jason | 4 | 25 | 5 |
| 1 | Molly | 24 | 94 | 43 |
| 2 | Tina | 31 | 57 | 23 |
| 3 | Jake | 2 | 62 | 23 |
| 4 | Amy | 3 | 70 | 51 |
```py
# 為每個變量創建得分均值的列表
mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]
# 創建變動列表,設為得分上下 .25
variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25]
# 設置條形標簽
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']
# 創建條形的 x 位置
x_pos = list(range(len(bar_labels)))
# 在 x 位置上創建條形圖
plt.bar(x_pos,
# 使用 mean_values 中的數據
mean_values,
# y-error 直線設置為變動
yerr=variance,
# 中心對齊
align='center',
# 顏色
color='#FFC222',
# 透明度為 0.5
alpha=0.5)
# 添加網格
plt.grid()
# 設置 y 軸高度
max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5)
plt.ylim([0, (max_y[0] + max_y[1]) * 1.1])
# 設置軸標簽和標題
plt.ylabel('Score')
plt.xticks(x_pos, bar_labels)
plt.title('Mean Scores For Each Test')
plt.show()
```

## Seaborn 中的調色板
```py
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
# 創建數據幀
data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'],
'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)
sns.palplot(sns.color_palette("deep", 10))
```

```py
sns.palplot(sns.color_palette("muted", 10))
```

```py
sns.palplot(sns.color_palette("bright", 10))
```

```py
sns.palplot(sns.color_palette("dark", 10))
```

```py
sns.palplot(sns.color_palette("colorblind", 10))
```

```py
sns.palplot(sns.color_palette("Paired", 10))
```

```py
sns.palplot(sns.color_palette("BuGn", 10))
```

```py
sns.palplot(sns.color_palette("GnBu", 10))
```

```py
sns.palplot(sns.color_palette("OrRd", 10))
```

```py
sns.palplot(sns.color_palette("PuBu", 10))
```

```py
sns.palplot(sns.color_palette("YlGn", 10))
```

```py
sns.palplot(sns.color_palette("YlGnBu", 10))
```

```py
sns.palplot(sns.color_palette("YlOrBr", 10))
```

```py
sns.palplot(sns.color_palette("YlOrRd", 10))
```

```py
sns.palplot(sns.color_palette("BrBG", 10))
```

```py
sns.palplot(sns.color_palette("PiYG", 10))
```

```py
sns.palplot(sns.color_palette("PRGn", 10))
```

```py
sns.palplot(sns.color_palette("PuOr", 10))
```

```py
sns.palplot(sns.color_palette("RdBu", 10))
```

```py
sns.palplot(sns.color_palette("RdGy", 10))
```

```py
sns.palplot(sns.color_palette("RdYlBu", 10))
```

```py
sns.palplot(sns.color_palette("RdYlGn", 10))
```

```py
sns.palplot(sns.color_palette("Spectral", 10))
```

```py
# 創建調色板并將其設為當前調色板
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.set_palette(flatui)
sns.palplot(sns.color_palette())
```

```py
# 設置繪圖顏色
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")
# <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70>
```

## 使用 Seaborn 和 pandas 創建時間序列繪圖
```py
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'],
'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred")
# <matplotlib.axes._subplots.AxesSubplot at 0x1140be780>
```

```py
# 帶有置信區間直線,但是沒有直線的時間序列繪圖
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)
# <matplotlib.axes._subplots.AxesSubplot at 0x116400668>
```

## 使用 Seaborn 創建散點圖
```py
import pandas as pd
%matplotlib inline
import random
import matplotlib.pyplot as plt
import seaborn as sns
# 創建空數據幀
df = pd.DataFrame()
# 添加列
df['x'] = random.sample(range(1, 1000), 5)
df['y'] = random.sample(range(1, 1000), 5)
df['z'] = [1,0,0,1,0]
df['k'] = ['male','male','male','female','female']
# 查看前幾行數據
df.head()
```
| | x | y | z | k |
| --- | --- | --- | --- | --- |
| 0 | 466 | 948 | 1 | male |
| 1 | 832 | 481 | 0 | male |
| 2 | 978 | 465 | 0 | male |
| 3 | 510 | 206 | 1 | female |
| 4 | 848 | 357 | 0 | female |
```py
# 設置散點圖樣式
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
# 創建數據幀的散點圖
sns.lmplot('x', # 橫軸
'y', # 縱軸
data=df, # 數據源
fit_reg=False, # 不要擬合回歸直線
hue="z", # 設置顏色
scatter_kws={"marker": "D", # 設置標記樣式
"s": 100}) # 設置標記大小
# 設置標題
plt.title('Histogram of IQ')
# 設置橫軸標簽
plt.xlabel('Time')
# 設置縱軸標簽
plt.ylabel('Deaths')
# <matplotlib.text.Text at 0x112b7bb70>
```

## MatPlotLib 中的分組條形圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'pre_score': [4, 24, 31, 2, 3],
'mid_score': [25, 94, 57, 62, 70],
'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
```
| | first_name | pre_score | mid_score | post_score |
| --- | --- | --- | --- | --- |
| 0 | Jason | 4 | 25 | 5 |
| 1 | Molly | 24 | 94 | 43 |
| 2 | Tina | 31 | 57 | 23 |
| 3 | Jake | 2 | 62 | 23 |
| 4 | Amy | 3 | 70 | 51 |
```py
# 設置條形的位置和寬度
pos = list(range(len(df['pre_score'])))
width = 0.25
# 繪制條形
fig, ax = plt.subplots(figsize=(10,5))
# 使用 pre_score 數據,
# 在位置 pos 上創建條形
plt.bar(pos,
# 使用數據 df['pre_score']
df['pre_score'],
# 寬度
width,
# 透明度為 0.5
alpha=0.5,
# 顏色
color='#EE3224',
# 標簽是 first_name 的第一個值
label=df['first_name'][0])
# 使用 mid_score 數據,
# 在位置 pos + 一定寬度上創建條形
plt.bar([p + width for p in pos],
# 使用數據 df['mid_score']
df['mid_score'],
# 寬度
width,
# 透明度為 0.5
alpha=0.5,
# 顏色
color='#F78F1E',
# 標簽是 first_name 的第二個值
label=df['first_name'][1])
# 使用 post_score 數據,
# 在位置 pos + 一定寬度上創建條形
plt.bar([p + width*2 for p in pos],
# 使用數據 df['post_score']
df['post_score'],
# 寬度
width,
# 透明度為 0.5
alpha=0.5,
# 顏色
color='#FFC222',
# 標簽是 first_name 的第三個值
label=df['first_name'][2])
# 設置縱軸標簽
ax.set_ylabel('Score')
# 設置標題
ax.set_title('Test Subject Scores')
# 設置 x 刻度的位置
ax.set_xticks([p + 1.5 * width for p in pos])
# 設置 x 刻度的標簽
ax.set_xticklabels(df['first_name'])
# 設置橫軸和縱軸的區域
plt.xlim(min(pos)-width, max(pos)+width*4)
plt.ylim([0, max(df['pre_score'] + df['mid_score'] + df['post_score'])] )
# 添加圖例并展示繪圖
plt.legend(['Pre Score', 'Mid Score', 'Post Score'], loc='upper left')
plt.grid()
plt.show()
```

## MatPlotLib 中的直方圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
# 設置 ipython 的最大行數
pd.set_option('display.max_row', 1000)
# 將 ipython 的最大列寬設為 50
pd.set_option('display.max_columns', 50)
df = pd.read_csv('https://www.dropbox.com/s/52cb7kcflr8qm2u/5kings_battles_v1.csv?dl=1')
df.head()
```
| | name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1 | 0 | 15000 | 4000 | Jaime Lannister | Clement Piper, Vance | 1 | Golden Tooth | The Westerlands | NaN |
| 1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1 | 0 | NaN | 120 | Gregor Clegane | Beric Dondarrion | 1 | Mummer's Ford | The Riverlands | NaN |
| 2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0 | 1 | 15000 | 10000 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1 | Riverrun | The Riverlands | NaN |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1 | 1 | 18000 | 20000 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1 | Green Fork | The Riverlands | NaN |
| 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1 | 1 | 1875 | 6000 | Robb Stark, Brynden Tully | Jaime Lannister | 1 | Whispering Wood | The Riverlands | NaN |
```py
# 制作攻擊方和防守方大小的兩個變量
# 但是當有超過 10000 個攻擊方時將其排除在外
data1 = df['attacker_size'][df['attacker_size'] < 90000]
data2 = df['defender_size'][df['attacker_size'] < 90000]
# 創建 2000 個桶
bins = np.arange(data1.min(), data2.max(), 2000) # 固定桶的大小
# 繪制攻擊方大小的直方圖
plt.hist(data1,
bins=bins,
alpha=0.5,
color='#EDD834',
label='Attacker')
# 繪制防守方大小的直方圖
plt.hist(data2,
bins=bins,
alpha=0.5,
color='#887E43',
label='Defender')
# 設置圖形的 x 和 y 邊界
plt.ylim([0, 10])
# 設置標題和標簽
plt.title('Histogram of Attacker and Defender Size')
plt.xlabel('Number of troops')
plt.ylabel('Number of battles')
plt.legend(loc='upper right')
plt.show()
```

```py
# 制作攻擊方和防守方大小的兩個變量
# 但是當有超過 10000 個攻擊方時將其排除在外
data1 = df['attacker_size'][df['attacker_size'] < 90000]
data2 = df['defender_size'][df['attacker_size'] < 90000]
# 創建 10 個桶,最小值為
# data1 和 data2 的最小值
bins = np.linspace(min(data1 + data2),
# 最大值為它們的最大值
max(data1 + data2),
# 并分為 10 個桶
10)
# 繪制攻擊方大小的直方圖
plt.hist(data1,
# 使用定義好的桶
bins=bins,
# 透明度
alpha=0.5,
# 顏色
color='#EDD834',
# 攻擊方的標簽
label='Attacker')
# 繪制防守方大小的直方圖
plt.hist(data2,
# 使用定義好的桶
bins=bins,
# 透明度
alpha=0.5,
# 顏色
color='#887E43',
# 防守方的標簽
label='Defender')
# 設置圖形的 x 和 y 邊界
plt.ylim([0, 10])
# 設置標題和標簽
plt.title('Histogram of Attacker and Defender Size')
plt.xlabel('Number of troops')
plt.ylabel('Number of battles')
plt.legend(loc='upper right')
plt.show()
```

## 從 Pandas 數據幀生成 MatPlotLib 散點圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
'female': [0, 1, 1, 0, 1],
'age': [42, 52, 36, 24, 73],
'preTestScore': [4, 24, 31, 2, 3],
'postTestScore': [25, 94, 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore'])
df
```
| | first_name | last_name | age | female | preTestScore | postTestScore |
| --- | --- | --- | --- | --- | --- | --- |
| 0 | Jason | Miller | 42 | 0 | 4 | 25 |
| 1 | Molly | Jacobson | 52 | 1 | 24 | 94 |
| 2 | Tina | Ali | 36 | 1 | 31 | 57 |
| 3 | Jake | Milner | 24 | 0 | 2 | 62 |
| 4 | Amy | Cooze | 73 | 1 | 3 | 70 |
```py
# preTestScore 和 postTestScore 的散點圖
# 每個點的大小取決于年齡
plt.scatter(df.preTestScore, df.postTestScore
, s=df.age)
# <matplotlib.collections.PathCollection at 0x10ca42b00>
```

```py
# preTestScore 和 postTestScore 的散點圖
# 大小為 300,顏色取決于性別
plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)
# <matplotlib.collections.PathCollection at 0x10cb90a90>
```

## Matplotlib 的簡單示例
```py
# 讓 Jupyter 加載 matplotlib
# 并內聯創建所有繪圖(也就是在頁面上)
%matplotlib inline
import matplotlib.pyplot as pyplot
pyplot.plot([1.6, 2.7])
# [<matplotlib.lines.Line2D at 0x10c4e7978>]
```

## MatPlotLib 中的餅圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
raw_data = {'officer_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'jan_arrests': [4, 24, 31, 2, 3],
'feb_arrests': [25, 94, 57, 62, 70],
'march_arrests': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['officer_name', 'jan_arrests', 'feb_arrests', 'march_arrests'])
df
```
| | officer_name | jan_arrests | feb_arrests | march_arrests |
| --- | --- | --- | --- | --- |
| 0 | Jason | 4 | 25 | 5 |
| 1 | Molly | 24 | 94 | 43 |
| 2 | Tina | 31 | 57 | 23 |
| 3 | Jake | 2 | 62 | 23 |
| 4 | Amy | 3 | 70 | 51 |
```py
# 創建一列,其中包含每個官員的總逮捕數
df['total_arrests'] = df['jan_arrests'] + df['feb_arrests'] + df['march_arrests']
df
```
| | officer_name | jan_arrests | feb_arrests | march_arrests | total_arrests |
| --- | --- | --- | --- | --- | --- |
| 0 | Jason | 4 | 25 | 5 | 34 |
| 1 | Molly | 24 | 94 | 43 | 161 |
| 2 | Tina | 31 | 57 | 23 | 111 |
| 3 | Jake | 2 | 62 | 23 | 87 |
| 4 | Amy | 3 | 70 | 51 | 124 |
```py
# (從 iWantHue)創建一列顏色
colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]
# 創建餅圖
plt.pie(
# 使用數據 total_arrests
df['total_arrests'],
# 標簽為官員名稱
labels=df['officer_name'],
# 沒有陰影
shadow=False,
# 顏色
colors=colors,
# 將一塊扇形移出去
explode=(0, 0, 0, 0, 0.15),
# 起始角度為 90 度
startangle=90,
# 將百分比列為分數
autopct='%1.1f%%',
)
# 使餅狀圖為正圓
plt.axis('equal')
# 查看繪圖
plt.tight_layout()
plt.show()
```

## MatPlotLib 中的散點圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 展示 ipython 的最大行數
pd.set_option('display.max_row', 1000)
# 將 ipython 的最大列寬設為 50
pd.set_option('display.max_columns', 50)
df = pd.read_csv('https://raw.githubusercontent.com/chrisalbon/war_of_the_five_kings_dataset/master/5kings_battles_v1.csv')
df.head()
```
| | name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1.0 | 0.0 | 15000.0 | 4000.0 | Jaime Lannister | Clement Piper, Vance | 1.0 | Golden Tooth | The Westerlands | NaN |
| 1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1.0 | 0.0 | NaN | 120.0 | Gregor Clegane | Beric Dondarrion | 1.0 | Mummer's Ford | The Riverlands | NaN |
| 2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0.0 | 1.0 | 15000.0 | 10000.0 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1.0 | Riverrun | The Riverlands | NaN |
| 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1.0 | 1.0 | 18000.0 | 20000.0 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1.0 | Green Fork | The Riverlands | NaN |
| 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1.0 | 1.0 | 1875.0 | 6000.0 | Robb Stark, Brynden Tully | Jaime Lannister | 1.0 | Whispering Wood | The Riverlands | NaN |
```py
# 創建圖形
plt.figure(figsize=(10,8))
# 創建散點圖
# 298 年的攻擊方大小為 x 軸
plt.scatter(df['attacker_size'][df['year'] == 298],
# 298 年的防守方大小為 y 軸
df['defender_size'][df['year'] == 298],
# 標記
marker='x',
# 顏色
color='b',
# 透明度
alpha=0.7,
# 大小
s = 124,
# 標簽
label='Year 298')
# 299 年的攻擊方大小為 x 軸
plt.scatter(df['attacker_size'][df['year'] == 299],
# 299 年的防守方大小為 y 軸
df['defender_size'][df['year'] == 299],
# 標記
marker='o',
# 顏色
color='r',
# 透明度
alpha=0.7,
# 大小
s = 124,
# 標簽
label='Year 299')
# 300 年的攻擊方大小為 x 軸
plt.scatter(df['attacker_size'][df['year'] == 300],
# 300 年的防守方大小為 x 軸
df['defender_size'][df['year'] == 300],
# 標記
marker='^',
# 顏色
color='g',
# 透明度
alpha=0.7,
# 大小
s = 124,
# 標簽
label='Year 300')
# 標題
plt.title('Battles Of The War Of The Five Kings')
# y 標簽
plt.ylabel('Defender Size')
# x 標簽
plt.xlabel('Attacker Size')
# 圖例
plt.legend(loc='upper right')
# 設置圖形邊界
plt.xlim([min(df['attacker_size'])-1000, max(df['attacker_size'])+1000])
plt.ylim([min(df['defender_size'])-1000, max(df['defender_size'])+1000])
plt.show()
```

## MatPlotLib 中的棧式百分比條形圖
```py
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'pre_score': [4, 24, 31, 2, 3],
'mid_score': [25, 94, 57, 62, 70],
'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
```
| | first_name | pre_score | mid_score | post_score |
| --- | --- | --- | --- | --- |
| 0 | Jason | 4 | 25 | 5 |
| 1 | Molly | 24 | 94 | 43 |
| 2 | Tina | 31 | 57 | 23 |
| 3 | Jake | 2 | 62 | 23 |
| 4 | Amy | 3 | 70 | 51 |
```py
# 創建帶有一個子圖的圖形
f, ax = plt.subplots(1, figsize=(10,5))
# 將條寬設為 1
bar_width = 1
# 條形左邊界的位置
bar_l = [i for i in range(len(df['pre_score']))]
# x 軸刻度的位置(條形的中心是條形標簽)
tick_pos = [i+(bar_width/2) for i in bar_l]
# 創建每個參與者的總得分
totals = [i+j+k for i,j,k in zip(df['pre_score'], df['mid_score'], df['post_score'])]
# 創建每個參與者的 pre_score 和總得分的百分比
pre_rel = [i / j * 100 for i,j in zip(df['pre_score'], totals)]
# 創建每個參與者的 mid_score 和總得分的百分比
mid_rel = [i / j * 100 for i,j in zip(df['mid_score'], totals)]
# 創建每個參與者的 post_score 和總得分的百分比
post_rel = [i / j * 100 for i,j in zip(df['post_score'], totals)]
# 在位置 bar_1 創建條形圖
ax.bar(bar_l,
# 使用數據 pre_rel
pre_rel,
# 標簽
label='Pre Score',
# 透明度
alpha=0.9,
# 顏色
color='#019600',
# 條形寬度
width=bar_width,
# 邊框顏色
edgecolor='white'
)
# 在位置 bar_1 創建條形圖
ax.bar(bar_l,
# 使用數據 mid_rel
mid_rel,
# 底部為 pre_rel
bottom=pre_rel,
# 標簽
label='Mid Score',
# 透明度
alpha=0.9,
# 顏色
color='#3C5F5A',
# 條形寬度
width=bar_width,
# 邊框顏色
edgecolor='white'
)
# Create a bar chart in position bar_1
ax.bar(bar_l,
# 使用數據 post_rel
post_rel,
# 底部為 pre_rel 和 mid_rel
bottom=[i+j for i,j in zip(pre_rel, mid_rel)],
# 標簽
label='Post Score',
# 透明度
alpha=0.9,
# 顏色
color='#219AD8',
# 條形寬度
width=bar_width,
# 邊框顏色
edgecolor='white'
)
# 將刻度設為 first_name
plt.xticks(tick_pos, df['first_name'])
ax.set_ylabel("Percentage")
ax.set_xlabel("")
# 設置圖形邊界
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.ylim(-10, 110)
# 旋轉軸標簽
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
# 展示繪圖
plt.show()
```
