生成海报3
# pandas
# 基本操作
import pandas as pd
import numpy as np
data_arr = np.random.random((5,3))
'''
[[0.50298075 0.43943837 0.98819337]
[0.15268398 0.62293458 0.59364578]
[0.15213759 0.44474662 0.96353211]
[0.7419613 0.10181253 0.39773341]
[0.52190963 0.88915847 0.83383479]]
'''
data_series = pd.Series(data_arr[:,0])
print(data_series)
'''
下标从0开始
0 0.502981
1 0.152684
2 0.152138
3 0.741961
4 0.521910
'''
print(data_series.loc[1])
# 0.152684 取到下标为1的值
print(data_series.iloc[1])
# 0.152684 取到下标为1的值
print(data_series.idxmax())
# 返回最大值的下标
# data_series.idxmin() 最小值下标
print(data_series.std())
# 0.2828663362478898标准差
print(data_series.mean())
# 平均值
# data_series.loc[1] = None
# NaN 下标1的位置的值为None 缺失值 输出为NaN
# print(data_series)
print(data_series.count())
# 4个数 如果值为缺失值的话 不计
data_arr_arr = data_series.to_numpy()
print(data_arr_arr)
# [0.50819551 nan 0.63141257 0.60749479 0.58868946]
print(data_arr)
data_df = pd.DataFrame(data= data_arr,columns=list('ABC'))
print(data_df)
'''
A B C
0 0.254512 0.757820 0.091550
1 0.516707 0.983151 0.663463
2 0.928628 0.014504 0.132114
3 0.477039 0.719829 0.105010
4 0.809334 0.672697 0.105112
'''
print(data_df.count())
'''
A 5
B 5
C 5
'''
print(data_df.mean())
'''
计算每一列的均值
A 0.204497
B 0.616396
C 0.374373
dtype: float64
'''
print(data_df.abs())
'''
计算绝对值
A B C
0 0.814022 0.658314 0.062514
1 0.517973 0.370916 0.754724
2 0.950446 0.348685 0.332213
3 0.362812 0.099421 0.707232
4 0.070324 0.228649 0.121512
'''
data_df['label'] = ['a','b','c','d','e']
print(data_df)
'''
添加一列
不能这样['a' 'b' 'c' 'd' 'e']
A B C label
0 0.924854 0.442920 0.845805 a
1 0.056361 0.265280 0.203053 b
2 0.489868 0.540504 0.086957 c
3 0.796409 0.981362 0.043635 d
4 0.993484 0.292087 0.259481 e
'''
print(data_df['A'])
'''
0 0.740210
1 0.409091
2 0.223357
3 0.051529
4 0.254907
Name: A, dtype: float64
'''
print(data_df['label'])
'''
可以通过这种方式进行元素取
0 a
1 b
2 c
3 d
4 e
Name: label, dtype: object
'''
print(data_df.B)
'''
还可以通过这种方式进行
0 0.682330
1 0.114727
2 0.359911
3 0.961283
4 0.384240
Name: B, dtype: float64
'''
# print(data_df.B.apply(lambda x: 1 if >0.5 else None))
print(data_df.corr())
'''
相关性 自动过滤分类数据 数据类型不是一类的,不要
A B C
A 1.000000 0.272891 0.642479
B 0.272891 1.000000 -0.228795
C 0.642479 -0.228795 1.000000
'''
print("==========")
print(data_df)
'''
A B C label
0 0.933791 0.127295 0.854814 a
1 0.834232 0.083129 0.679133 b
2 0.363345 0.374977 0.948414 c
3 0.989602 0.935234 0.081740 d
4 0.112818 0.971566 0.310665 e
'''
print(data_df.loc[1:3])
'''
index为1到3的全部输出
A B C label
1 0.834232 0.083129 0.679133 b
2 0.363345 0.374977 0.948414 c
3 0.989602 0.935234 0.081740 d
'''
print(data_df[:1])
'''
第一行的数据
0 0.570442 0.193728 0.970507 a
'''
print("-------------")
print(data_df.query("label in ['a'] and B>0.5"))
'''
筛选
A B C label
0 0.019635 0.885094 0.00464 a
'''
print(data_df.to_dict())
'''
转换成字典
{'A': {0: 0.5305044817973736, 1: 0.5509991132614303, 2: 0.7438018909462376,
'''
print(data_df.values)
'''
[[0.5638733074884312 0.4203840086067062 0.23095162227011978 'a']
[0.763707311401047 0.3127414464213296 0.0987114590518412 'b']
[0.3055716561013855 0.8602716740001318 0.38640561867331946 'c']
[0.5131968102613049 0.1743155672515787 0.60567589693313 'd']
[0.3592365104159433 0.13353978871455607 0.08760872175584289 'e']]
数组 只是数据类型不一样
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# 数据读取



古茗
¥ 10

烧奶珍珠奶茶
¥ 9

酸奶
¥ 4

其他
¥ 自定义
冰糖指蜂蜜
¥ 10
推荐阅读
点击评论
文章目录
最新文章
公告