Pandas数据排序
.sort_index()
在指定轴上根据索引进行排序,索引排序后内容会跟随排序
import pandas as pdimport numpy as npb = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])b
| 0 | 1 | 2 | 3 | 4 |
c | 0 | 1 | 2 | 3 | 4 |
a | 5 | 6 | 7 | 8 | 9 |
d | 10 | 11 | 12 | 13 | 14 |
b | 15 | 16 | 17 | 18 | 19 |
b.sort_index()
| 0 | 1 | 2 | 3 | 4 |
a | 5 | 6 | 7 | 8 | 9 |
b | 15 | 16 | 17 | 18 | 19 |
c | 0 | 1 | 2 | 3 | 4 |
d | 10 | 11 | 12 | 13 | 14 |
b.sort_index(ascending=False)
| 0 | 1 | 2 | 3 | 4 |
d | 10 | 11 | 12 | 13 | 14 |
c | 0 | 1 | 2 | 3 | 4 |
b | 15 | 16 | 17 | 18 | 19 |
a | 5 | 6 | 7 | 8 | 9 |
b.sort_index(axis=0, ascending=False) # 按行标排序,ascending:False为降序
| 0 | 1 | 2 | 3 | 4 |
d | 10 | 11 | 12 | 13 | 14 |
c | 0 | 1 | 2 | 3 | 4 |
b | 15 | 16 | 17 | 18 | 19 |
a | 5 | 6 | 7 | 8 | 9 |
b.sort_index(axis=1, ascending=False) # 按列标排序
| 4 | 3 | 2 | 1 | 0 |
c | 4 | 3 | 2 | 1 | 0 |
a | 9 | 8 | 7 | 6 | 5 |
d | 14 | 13 | 12 | 11 | 10 |
b | 19 | 18 | 17 | 16 | 15 |
.sort_values()
在指定轴上根据数值进行排序,默认升序
- Series.sort_values(axis=0,ascending=True)
- DataFrame.sort_values(by,axis=0,ascending=True)
dates = pd.date_range('20130101', periods=10)dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08', '2013-01-09', '2013-01-10'], dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(10,4),index=dates,columns=['A','B','C','D'])df.head()
| A | B | C | D |
2013-01-01 | -0.300266 | 0.683232 | 0.777509 | -0.274338 |
2013-01-02 | 2.298084 | -0.855524 | 1.462064 | -0.725142 |
2013-01-03 | 0.512711 | 0.824380 | 0.384902 | -1.437241 |
2013-01-04 | 0.388478 | -1.265414 | -1.104333 | -0.447689 |
2013-01-05 | 0.273518 | -0.314857 | -2.545510 | -1.301629 |
c = df.sort_values('B')c.head()
| A | B | C | D |
2013-01-01 | -0.976353 | -2.176075 | 0.255585 | 0.645465 |
2013-01-03 | -1.549727 | -1.876790 | 0.966724 | 0.486101 |
2013-01-06 | -0.000467 | -1.430820 | -1.803610 | -0.587985 |
2013-01-10 | -0.293663 | -0.691951 | 0.262666 | -1.298977 |
2013-01-04 | -0.032301 | -0.618582 | 1.204373 | -0.302137 |
c = df.sort_values('B',ascending = False)c.head()
| A | B | C | D |
2013-01-03 | 0.512711 | 0.824380 | 0.384902 | -1.437241 |
2013-01-01 | -0.300266 | 0.683232 | 0.777509 | -0.274338 |
2013-01-08 | 0.010939 | 0.591777 | 0.143182 | 0.461798 |
2013-01-10 | 0.811169 | 0.100516 | -1.385373 | 0.168329 |
2013-01-05 | 0.273518 | -0.314857 | -2.545510 | -1.301629 |
# 指定1轴排序,这里指定1轴a行为基准排序c = df.sort_values('2013-01-01',axis=1,ascending=False) c.head()
| C | B | D | A |
2013-01-01 | 0.777509 | 0.683232 | -0.274338 | -0.300266 |
2013-01-02 | 1.462064 | -0.855524 | -0.725142 | 2.298084 |
2013-01-03 | 0.384902 | 0.824380 | -1.437241 | 0.512711 |
2013-01-04 | -1.104333 | -1.265414 | -0.447689 | 0.388478 |
2013-01-05 | -2.545510 | -0.314857 | -1.301629 | 0.273518 |
NaN空值统一放在排序末尾
a = pd.DataFrame(np.arange(12).reshape(3,4),index=['a','b','c'])a
| 0 | 1 | 2 | 3 |
a | 0 | 1 | 2 | 3 |
b | 4 | 5 | 6 | 7 |
c | 8 | 9 | 10 | 11 |
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])b
| 0 | 1 | 2 | 3 | 4 |
c | 0 | 1 | 2 | 3 | 4 |
a | 5 | 6 | 7 | 8 | 9 |
d | 10 | 11 | 12 | 13 | 14 |
b | 15 | 16 | 17 | 18 | 19 |
c = a + bc
| 0 | 1 | 2 | 3 | 4 |
a | 5.0 | 7.0 | 9.0 | 11.0 | NaN |
b | 19.0 | 21.0 | 23.0 | 25.0 | NaN |
c | 8.0 | 10.0 | 12.0 | 14.0 | NaN |
d | NaN | NaN | NaN | NaN | NaN |
c.sort_values(2,ascending = False)
| 0 | 1 | 2 | 3 | 4 |
b | 19.0 | 21.0 | 23.0 | 25.0 | NaN |
c | 8.0 | 10.0 | 12.0 | 14.0 | NaN |
a | 5.0 | 7.0 | 9.0 | 11.0 | NaN |
d | NaN | NaN | NaN | NaN | NaN |
c.sort_values(2,ascending = True)
| 0 | 1 | 2 | 3 | 4 |
a | 5.0 | 7.0 | 9.0 | 11.0 | NaN |
c | 8.0 | 10.0 | 12.0 | 14.0 | NaN |
b | 19.0 | 21.0 | 23.0 | 25.0 | NaN |
d | NaN | NaN | NaN | NaN | NaN |