[TOC]
# datetime構造時間
~~~
import datetime
dt = datetime.datetime(year=2017, month=11, day=24, hour=10, minute=30)
print(dt)
~~~
輸出
~~~
2017-11-24 10:30:00
~~~
# pandas構造時間
~~~
import pandas as pd
ts = pd.Timestamp('2017-11-24')
print(ts)
~~~
輸出
`2017-11-24 00:00:00`
或者to_datetime這種
~~~
import pandas as pd
datetime = pd.to_datetime('2017-11-24')
to_datetime = pd.to_datetime('11/23/2017')
print(datetime)
print(to_datetime)
~~~
輸出
~~~
2017-11-24 00:00:00
2017-11-23 00:00:00
~~~
# 獲取月份,天數
~~~
import pandas as pd
# 用/寫注意順序
ts = pd.Timestamp('24/11/2017')
# -的構造也能用這個
# 獲取月份
print(ts.month)
print(ts.day)
~~~
輸出
~~~
11
24
~~~
# 增加天數
~~~
import pandas as pd
timestamp = pd.Timestamp('2018-05-01')
# 注意單詞,是Timedelta
rel = timestamp + pd.Timedelta('5 days')
print(rel)
~~~
輸出
`2018-05-06 00:00:00`
# 構造一個Series結構
~~~
import pandas as pd
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
print(s)
~~~
輸出
~~~
0 2017-11-24 00:00:00
1 2017-11-25 00:00:00
2 2017-11-26 00:00:00
dtype: object
~~~
把他們轉換成datatime格式
~~~
import pandas as pd
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
datetime = pd.to_datetime(s)
print(datetime)
~~~
輸出
~~~
0 2017-11-24
1 2017-11-25
2 2017-11-26
dtype: datetime64[ns]
~~~
獲取他們的小時和周
~~~
import pandas as pd
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
datetime = pd.to_datetime(s)
print(datetime.dt.hour)
print('-'*30)
# 周的不是按照中國的定義的,而是按照美國那邊定義的
print(datetime.dt.weekday)
~~~
輸出
~~~
0 0
1 0
2 0
dtype: int64
------------------------------
0 4
1 5
2 6
dtype: int64
~~~
# 構造Series數據
~~~
import pandas as pd
# 從2017-11-24開始,構造3個數據,每個間隔12H
series = pd.Series(pd.date_range(start='2017-11-24', periods=3, frep='12H'))
print(series)
~~~
輸出
~~~
0 2017-11-24
1 2017-11-25
2 2017-11-26
dtype: datetime64[ns]
~~~
# 用pandas分析csv的日期
csv結構

~~~
import pandas as pd
data = pd.read_csv('./flowdata.csv')
head = data.head()
print(head)
~~~
輸出
~~~
Time L06_347 LS06_347 LS06_348
0 2009-01-01 00:00:00 0.137417 0.097500 0.016833
1 2009-01-01 03:00:00 0.131250 0.088833 0.016417
2 2009-01-01 06:00:00 0.113500 0.091250 0.016750
3 2009-01-01 09:00:00 0.135750 0.091500 0.016250
4 2009-01-01 12:00:00 0.140917 0.096167 0.017000
~~~
## 讀取后設置索引
~~~
import pandas as pd
data = pd.read_csv('./flowdata.csv').head()
# 把時間轉換為datetime結構
data['Time'] = pd.to_datetime(data['Time'])
# 設置索引為datetime
data = data.set_index('Time')
print(data)
print('-'*30)
# 打印索引
print(data.index)
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 00:00:00 0.137417 0.097500 0.016833
2009-01-01 03:00:00 0.131250 0.088833 0.016417
2009-01-01 06:00:00 0.113500 0.091250 0.016750
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-01 12:00:00 0.140917 0.096167 0.017000
------------------------------
DatetimeIndex(['2009-01-01 00:00:00', '2009-01-01 03:00:00',
'2009-01-01 06:00:00', '2009-01-01 09:00:00',
'2009-01-01 12:00:00'],
dtype='datetime64[ns]', name='Time', freq=None)
~~~
## 讀取時設置索引列,并格式化
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True).head()
print(data)
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 00:00:00 0.137417 0.097500 0.016833
2009-01-01 03:00:00 0.131250 0.088833 0.016417
2009-01-01 06:00:00 0.113500 0.091250 0.016750
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-01 12:00:00 0.140917 0.096167 0.017000
~~~
## 分片獲取數據
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 分片獲取數據
# 也可以這樣寫
# data[('2012-01-01 09:00'):('2012-01-01 19:00')]
# 分片也支持這樣 data['2012-01':'2012-03']
dt = data[pd.Timestamp('2012-01-01 09:00'):pd.Timestamp('2012-01-01 19:00')]
print(dt)
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2012-01-01 09:00:00 0.330750 0.293583 0.029750
2012-01-01 12:00:00 0.295000 0.285167 0.031750
2012-01-01 15:00:00 0.301417 0.287750 0.031417
2012-01-01 18:00:00 0.322083 0.304167 0.038083
~~~
## 獲取倒數10個數據
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True).tail(10)
print(data)
~~~
## 獲取某一年的數據
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 獲取某一年的數據
print(data['2013'])
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2013-01-01 00:00:00 1.688333 1.688333 0.207333
2013-01-01 03:00:00 2.693333 2.693333 0.201500
2013-01-01 06:00:00 2.220833 2.220833 0.166917
~~~
## 獲取都是某個月份的數據
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 獲取全部都是1月的數據
dt = data[data.index.month == 1]
print(dt.head())
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 00:00:00 0.137417 0.097500 0.016833
2009-01-01 03:00:00 0.131250 0.088833 0.016417
2009-01-01 06:00:00 0.113500 0.091250 0.016750
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-01 12:00:00 0.140917 0.096167 0.017000
~~~
# 獲取指定時間內的數據
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 獲取8-12小時的數據
# 也可以這樣 data.between_time('08:00', '12:00')
dt = data[(data.index.hour > 8) & (data.index.hour < 12)]
print(dt.head())
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-02 09:00:00 0.141917 0.097083 0.016417
2009-01-03 09:00:00 0.124583 0.084417 0.015833
2009-01-04 09:00:00 0.109000 0.105167 0.018000
2009-01-05 09:00:00 0.161500 0.114583 0.021583
~~~
# 重采樣
按天重采樣求均值
~~~
import pandas as pd
# 設置索引列,對索引列進行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 按3天 data.resample('3D').mean().head()
# 按月 data.resample('M').mean().head()
# 按天采樣就最大值 data.resample('D').max().head()
dt = data.resample('D').mean().head()
print(dt)
~~~
輸出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 0.125010 0.092281 0.016635
2009-01-02 0.124146 0.095781 0.016406
2009-01-03 0.113562 0.085542 0.016094
2009-01-04 0.140198 0.102708 0.017323
2009-01-05 0.128812 0.104490 0.018167
~~~