Series是一种类似于一维数组的结构,和Excel中一行或一列数据类似。Series包含标签列和数据列,通常用于存储数据分析中的变量,尤其是时间序列变量。
1 创建Series
创建空Series:
>>> import pandas as pd
>>> import numpy as np # pandas基于numpy开发,所以需要导入numpy库
>>> s1 = pd.Series(dtype='object') # 创建空的Series时,需要指定类型,否则会有警告!
>>> s1
Series([], dtype: object)
向量创建Series:
>>> array1= np.array([13,21,16,25,31,19,44])
>>> s2_1=pd.Series(array1)
>>> s2_1
0 13
1 21
2 16
3 25
4 31
5 19
6 44
dtype: int32
>>> s2_2=pd.Series(array1,index=range(2001,2008))
>>> s2_2
2001 13
2002 21
2003 16
2004 25
2005 31
2006 19
2007 44
dtype: int32
列表创建Series:
>>> s3=pd.Series(['女','女','女','男','男','女','男','女',])
>>> s3
0 女
1 女
2 女
3 男
4 男
5 女
6 男
7 女
dtype: object
字典创建Series:
>>> dict1 = {'姓名': '张三', '年龄': 29, '性别': '男'}
>>> s4_1=pd.Series(dict1)
>>> s4_1
姓名 张三
年龄 29
性别 男
dtype: object
>>> dict2 = {'语文': 75, '数学': 88, '物理': 69}
>>> s4_2=pd.Series(dict2,index=['语文','化学','历史','物理','数学'])
>>> s4_2
语文 75.0
化学 NaN
历史 NaN
物理 69.0
数学 88.0
dtype: float64
2 Series数据的索引、修改和删除
索引:
>>> array1= np.array([13,21,16,25,31,19,44])
>>> s5_1=pd.Series(array1,index=['a','b','c','d','e','f','g'])
>>> s5_1
a 13
b 21
c 16
d 25
e 31
f 19
g 44
dtype: int32
>>> s5_1['b']
21
>>> s5_1[['b','d']]
b 21
d 25
dtype: int32
>>> s5_1[1]
21
>>> s5_1[[1,3]]
b 21
d 25
dtype: int32
>>> s5_1[2:6]
c 16
d 25
e 31
f 19
dtype: int32
>>> s5_1[2:-1]
c 16
d 25
e 31
f 19
dtype: int32
>>> s5_1[s5_1>25]
b 33
e 31
g 44
dtype: int32
>>> s5_2=pd.Series(array1,index=range(2001,2008))
>>> s5_2
2001 13
2002 21
2003 16
2004 25
2005 31
2006 19
2007 44
dtype: int32
>>> s5_2[1] # 如果Series的标签是数值,则单个索引失效!
出错!!
>>> s5_2[2002] # 直接用标签的值作为索引
21
>>> s5_2[2:6] # 在切片索引中,则不受数值标签的影响!
2003 16
2004 25
2005 31
2006 19
dtype: int32
修改:
>>> array1= np.array([13,21,16,25,31,19,44])
>>> s5_1=pd.Series(array1,index=['a','b','c','d','e','f','g'])
>>> s5_1
a 13
b 21
c 16
d 25
e 31
f 19
g 44
dtype: int32
>>> s5_1[1]=666
>>> s5_1['f']=888
>>> s5_1
a 13
b 666
c 16
d 25
e 31
f 888
g 44
dtype: int32
删除:
>>> array1= np.array([13,21,16,25,31,19,44])
>>> s5_1=pd.Series(array1,index=['a','b','c','d','e','f','g'])
>>> s5_1
a 13
b 21
c 16
d 25
e 31
f 19
g 44
dtype: int32
>>> s5_1.drop('c',inplace=True) # 只能根据标签删除,不能根据下标索引删除!
>>> del(s5_1['a']) # 只能根据标签删除,不能根据下标索引删除!
>>> s5_1
b 21
d 25
e 31
f 19
g 44
dtype: int32
添加:
>>> array1= np.array([13,21,16,25,31,19,44])
>>> s5_1=pd.Series(array1,index=['a','b','c','d','e','f','g'])
>>> s5_1
a 13
b 21
c 16
d 25
e 31
f 19
g 44
dtype: int32
>>> s5_1['h']=62
>>> s5_1
a 13
b 21
c 16
d 25
e 31
f 19
g 44
h 62
dtype: int64
3 获取Series的属性
>>> array1= np.array([13,21,16,25,31,19,44])
>>> s6_1=pd.Series(array1,index=['a','b','c','d','e','f','g'])
>>> s6_1.axes # 获取标签列
[Index(['a', 'b', 'c', 'd', 'e', 'f', 'g'], dtype='object')]
>>> s6_1.values # 获取数值列
array([13, 21, 16, 25, 31, 19, 44])
>>> s6_1.ndim # 获取维度(Series的维度始终为1)
1
>>> s6_1.size # 获取Series的长度
7
4 Series中的函数操作
>>> array2= np.array([13,21,16,25,31,19,25])
>>> s7_1=pd.Series(array2,index=['a','b','c','d','e','f','g'])
>>> s7_1.sort_index(ascending=False)
g 25
f 19
e 31
d 25
c 16
b 21
a 13
dtype: int32
>>> s7_1.sort_values()
a 13
c 16
f 19
b 21
d 25
g 25
e 31
dtype: int32
>>> s7_1.replace(25,52)
a 13
b 21
c 16
d 52
e 31
f 19
g 52
dtype: int32
>>> s7_2=s7_1
>>> s7_2['e']=16
>>> s7_2
a 13
b 21
c 16
d 25
e 16
f 19
g 25
dtype: int32
>>> s7_2.drop_duplicates()
a 13
b 21
c 16
d 25
f 19
dtype: int32
>>> s7_1.mean()
19.285714285714285
>>> s7_1.median()
19.0
>>> s7_1.sum()
135