Pandas
pandas
參考資料:https://pandas.pydata.org/pandas-docs/stable/reference/index.html
### Series #### 製作出一個一維陣列 1
2
3
4
5
6import pandas as pd
data = pd.Series([1,2,3,4,5])
print(data)
print("Max:",data.max())#output is 5
print("Median",data.median())#output is 3.0
print(data==20)#output is a boolean1
2
3
4
5import pandas as pd
data = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
print(data.dtype)#查看資料型態
print(data.size)#查看資料數量
print(data.index)#查看資料索引1
2
3import pandas as pd
data = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
print(data)1
2
3
4import pandas as pd
data = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
print(data[2],data[0])##按照順序取得資料
print(data["e"],["d"])##按照索引取得資料sum()
、max()
、prod()全部相乘、mean()、median()、std()標準差、nlargest(n)前n大的數字、nsmallest(n)取最小的n個數字
1
2
3
4
5
6
7import pandas as pd
data = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
print(data.max())#最大值
print(data.sum())#總和
print(data.std())#標準差
print(data.median())#中位數
print(data.nlargest(3))#最大的三個數1
2
3
4
5
6
7import pandas as pd
data = pd.Series(['taiwan','台灣','python'])
print(data.str.lower())#全部變小寫
print(data.str.len())#算出每個字串長度
print(data.str.cat(sep = ','))#將字串串在一起,中間以,相接
print(data.str.contains('t'))#判斷字串是否包含t
print(data.str.replace('台灣','中華民國'))#將台灣換成中華民國列row行column(直行橫列)
basis form:DataFrame(dict,index) 1
2
3
4
5
6
7
8
9import pandas as pd
data = pd.DataFrame({'name':['max','tom','tank'],'salary':[30000,50000,120000]})
print(data)
print('==========')
print(data['name'])#取得行
print('==========')
print(data['salary'])#取得行
print('==========')
print(data.iloc[1])#取得列1
2
3import pandas as pd
data = pd.DataFrame({'name':['max','tom','steven'],'salary':[30000,80000,120000]},index = ['a','b','c'])
print(data)1
2
3
4
5import pandas as pd
data = pd.DataFrame({'name':['max','tom','steven'],'salary':[30000,80000,120000]},index = ['a','b','c'])
print(data.size)#資料數量
print(data.shape)#資料形狀
print(data.index)#資料索引1
2
3
4import pandas as pd
data = pd.DataFrame({'name':['max','tom','steven'],'salary':[30000,80000,120000]},index = ['a','b','c'])
print(data.iloc[1])#按照順序取得第2列
print(data.loc['c'])#按照索引取得第3列1
2
3
4
5
6
7import pandas as pd
data = pd.DataFrame({'name':['max','tom','steven'],'salary':[30000,80000,120000]},index = ['a','b','c'])
print(data['name'])
new_data = data['name']
print(new_data.str.upper())
salary_aver = data['salary']
print(salary_aver.mean())1
2
3
4
5
6import pandas as pd
data = pd.DataFrame({'name':['max','tom','steven'],'salary':[30000,80000,120000]},index = ['a','b','c'])
data['profit'] = [10000,20000,30000]#data[新欄位的名稱] = 列表
data['rank'] = pd.Series([3,6,1],index = ['a','b','c'])#data[新欄位的名稱] = Series的資料
data['cp'] = data['profit']/data['salary']#運用舊欄位產生出新欄位
print(data)1
2
3
4
5
6import pandas as pd
data = pd.Series([30,15,20])
condition = data>18
print(condition)
newData = data[condition]
print(newData)1
2
3
4
5import pandas as pd
data = pd.Series(["你好","python","pandas"])
condition = data.str.contains('p')
newdata = data[condition]
print(newdata)1
2
3
4
5
6import pandas as pd
data = pd.DataFrame({'name':['max','tom','ken'],'salary':[30000,50000,40000]})
condition = data['salary'] > 30000
print(condition)
newdata = data[condition]
print(newdata)1
2
3
4
5
6import pandas as pd
data = pd.DataFrame({'name':['max','tom','ken'],'salary':[30000,50000,40000]})
condition = data['name'] == 'max'
print(condition)
newdata = data[condition]
print(newdata)