axis

轴用来为超过一维的数组定义的属性，二维数据拥有两个轴：第0轴沿着行的垂直往下，第1
轴沿着列的方向水平延伸。逐行即把每一行延伸下来的一列当做一组
逐列即把每一列延伸下来的一行当做一组

1
2
3

df = pd.DataFrame(np.arange(12).reshape((3,4)),index =['a','b','c'],
                  columns =['one','two','three','four'] )
print(df)

#+RESULTS:
:    one  two  three  four
: a    0    1      2     3
: b    4    5      6     7
: c    8    9     10    11

sum等操作

1
2
3

print(df.sum()) #默认是0，逐行向下，所以是沿着列计算的
print()
print(df.sum(1))#axis = 1的操作，沿着列向右做计算的，所以是各行的计算

drop等的计算

1 2	print(df.drop('four',axis = 1)) #沿着列丢弃four print(df.drop('c',axis = 0)) #沿着行丢弃c

#+RESULTS:
:    one  two  three
: a    0    1      2
: b    4    5      6
: c    8    9     10
:    one  two  three  four
: a    0    1      2     3
: b    4    5      6     7

plus拼接

df1 = df.reset_index().copy()
df2 =df.copy()

#merge
merged = df1.merge(df.reset_index(), on='index')
print(merged)  #横向连接,即axis = 1默认是内连接，
merged_index = df1.merge(df,right_index = True,left_on='index')
print(merged_index) #即df1的index列和df的index连接
merged_all_Index = df2.merge(df,right_index = True,left_index = True )
print(merged_all_Index)

#numpy的concatenate axis = 1即横向连接，axis=0即竖向连接,默认axis=0
arr = np.arange(12).reshape(3,4)
print(arr)
print(np.concatenate([arr,arr]))
print('axis = 1')
print(np.concatenate([arr,arr],axis = 1))

#pd.concat axis=0默认，即竖向连接，axis=1是横向，默认是outer连接
print(pd.concat([df,df]))
print('axis =1 ')
print(pd.concat([df,df],axis =1))

add/div等操作

#axis =0 的情况，即逐行的以组series
#因为df是 3*4的，所以要一个长度3的数组
print(df.add(np.arange(3),axis=0))

#axis = 1的情况， 即逐列的一组数组， 默认axis = 1
print(df.add(np.arange(4)))

#+RESULTS:
:    one  two  three  four
: a    0    1      2     3
: b    5    6      7     8
: c   10   11     12    13
:    one  two  three  four
: a    0    2      4     6
: b    4    6      8    10
: c    8   10     12    14

聚合groupby

#groupby 默认按照axis=0 也可以axis=1聚合
df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],
                   'key2' : ['one', 'two', 'one', 'two', 'one'],
                   'data1' : np.random.randn(5),
                   'data2' : np.random.randn(5)})
print(df)
#axis = 0即对于列key，划分数据，列1里，0，1，4为a,其他列同理；按照行分来
for a, b in df.groupby('key1',axis =0):
    print(a)
    print(b)
    print()

print('-----axis=1的情况----------')
#axis = 1 即对着列分割，每一行按照key进行分割，整体看来是按照列 分割
#object就是第一和第二列，float64即后面两列
for a, b in df.groupby(df.dtypes, axis =1):
    print(a)
    print(b)
    print()