numpy 数据分析基础
创建数组
方法 | 说明 |
---|---|
np.array( x ) | 将输入数据转化为一个ndarray| |
np.array( x, dtype ) | 将输入数据转化为一个类型为type的ndarray| |
np.asarray( array ) | 将输入数据转化为一个新的(copy) |
np.ones( N ) | 生成一个N长度的一维全一ndarray |
np.ones( N, dtype) | 生成一个N长度类型是dtype的一维全一ndarray |
np.ones_like( ndarray ) | 生成一个形状与参数相同的全一ndarray |
np.zeros( N) | 生成一个N长度的一维全零ndarray |
np.zeros( N, dtype) | 生成一个N长度类型位dtype的一维全零ndarray |
np.zeros_like(ndarray) | 类似np.ones_like( ndarray ) |
np.empty( N ) | 生成一个N长度的未初始化一维ndarray |
np.empty( N, dtype) | 生成一个N长度类型是dtype的未初始化一维ndarray |
np.empty(ndarray) | 类似np.ones_like( ndarray ) |
np.eye( N ) | 创建一个N * N的单位矩阵(对角线为1,其余为0) |
np.identity( N ) | |
np.arange( num) | 生成一个从0到num-1步数为1的一维ndarray |
np.arange( begin, end) | 生成一个从begin到end-1步数为1的一维ndarray |
np.arange( begin, end, step) | 生成一个从begin到end-step的步数为step的一维ndarray |
np.mershgrid(ndarray, ndarray,...) | 生成一个ndarray * ndarray * ...的多维ndarray |
np.where(cond, ndarray1, ndarray2) | 根据条件cond,选取ndarray1或者ndarray2,返回一个新的ndarray |
np.in1d(ndarray, [x,y,...]) | 检查ndarray中的元素是否等于[x,y,...]中的一个,返回bool数组 |
import numpy as np
array 创建数组
np.array([[1,2],[3,4]]) arr = [1,2,3,4,5,6,7] a = np.array(arr) a
array([1, 2, 3, 4, 5, 6, 7])
zeros 创建默认0的数组
a = np.zeros(3) print ('a:',a) b = np.zeros((3,3)) print('b:',b) c = np.zeros_like(a) print('c:',c)
a: [0. 0. 0.] b: [[0. 0. 0.] [0. 0. 0.] [0. 0. 0.]] c: [0. 0. 0.]
ones 创建默认1的数组
a = np.ones(3) print ('a:',a) b = np.ones((3,3)) print('b:',b) c = np.ones_like(b) print('c:',c)
a: [1. 1. 1.] b: [[1. 1. 1.] [1. 1. 1.] [1. 1. 1.]] c: [[1. 1. 1.] [1. 1. 1.] [1. 1. 1.]]
empty 创建默认值 随机的数组
print (np.empty((2))) print(np.empty((2,4)))
[-5.73021895e-300 6.92466535e-310] [[6.92449667e-310 5.02034658e+175 6.03195894e+174 2.73950109e-057] [7.12194759e-067 2.10979251e-052 1.47763641e+248 1.16096346e-028]]
random.randn 随机值数组
np.random.randn(6)
array([ 0.84344504, -0.73714262, 0.93808627, -1.00264675, -0.17058495, 0.4664123 ])
random.randn 随机值多维数组
np.random.randn(2,2)
array([[ 0.38153354, -0.92519611], [-1.3137341 , -1.26921917]])
arange 创建序列数组
## arange 0-9 print(np.arange(10)) ## arange 5-15 print(np.arange(5,15)) ## arange 13579 print(np.arange(1,10,2))
[0 1 2 3 4 5 6 7 8 9] [ 5 6 7 8 9 10 11 12 13 14] [1 3 5 7 9]
eye identity 创建对角线为一的多维数组
a = np.eye(4) print(a) np.identity(4)
[[1. 0. 0. 0.] [0. 1. 0. 0.] [0. 0. 1. 0.] [0. 0. 0. 1.]] array([[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 0., 1.]])
np.eye(3,3,3)
array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
reshape 一维转多维
a = np.arange(4).reshape(2,2) print(a)
[[0 1] [2 3]]
meshgrid 生成一个ndarray * ndarray * ...的多维ndarray
x = np.array([0, 1, 2]) y = np.array([0, 1]) X, Y = np.meshgrid(x, y) print(X) print(Y)
[[0 1 2] [0 1 2]] [[0 0 0] [1 1 1]]
常用方法
ndim 查看数据维度
shape 查看数据维度大小
dtype 查看数据类型
a = np.random.randn(3,3,3) # 查看数据维度 print(a.ndim) # 查看数据维度大小 print(a.shape) # dtype 查看数据类型 print(a.dtype)
3 (3, 3, 3) float64
astype转换数据类型
a = np.eye(5,5) print(a.dtype) b = a.astype(np.int64).dtype print(b)
float64 int64
数学运算
# + a = np.arange(4).reshape(2,2) print(a) print('++++++++') print(a+1) # - print('--------') print(a-1) # * print('********') print(a*2) # / print('////////') print(1/(a+1))
[[0 1] [2 3]] ++++++++ [[1 2] [3 4]] -------- [[-1 0] [ 1 2]] ******** [[0 2] [4 6]] //////// [[1. 0.5 ] [0.33333333 0.25 ]]
比较运算
a = np.arange(6,0,-1).reshape(2,3) b = np.arange(3,9).reshape(2,3) print(a,b,a>b,sep='\n')
[[6 5 4] [3 2 1]] [[3 4 5] [6 7 8]] [[ True True False] [False False False]]
切片和索引
一维切片同py list
a = np.arange(10) print(a) print(a[5]) print(a[5:8]) a[5:8] = 12 print(a)
[0 1 2 3 4 5 6 7 8 9] 5 [5 6 7] [ 0 1 2 3 4 12 12 12 8 9]
切片后不等copy
b = a[5:8] print(b) b[1] = 21 print(a)
[12 12 12] [ 0 1 2 3 4 12 21 12 8 9]
# 切片给数组赋值 a[:] = 0 a
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
copy 复制
# 复制 b = a[2:5].copy() print(b) b[:] = 11 print(b) print(a)
[0 0 0] [11 11 11] [0 0 0 0 0 0 0 0 0 0]
多维数组
# 多维数组 a2d = np.arange(6).reshape(2,3) print(a2d) # 两种取值方式,结果一样 print(a2d[0][2]) print(a2d[0,2])
[[0 1 2] [3 4 5]] 2 2
# 多维数组 a3d = np.arange(1,13).reshape(2,2,3) print(a3d) print(a3d.ndim) print(a3d[0]) print(a3d[0].ndim)
[[[ 1 2 3] [ 4 5 6]] [[ 7 8 9] [10 11 12]]] 3 [[1 2 3] [4 5 6]] 2
标量值和数组都可以赋值
# 标量值和数组都可以赋值给 a3d tmp = a3d[0].copy() a3d[0] = 99 print(a3d) a3d[0] = tmp print(a3d)
[[[99 99 99] [99 99 99]] [[ 7 8 9] [10 11 12]]] [[[ 1 2 3] [ 4 5 6]] [[ 7 8 9] [10 11 12]]]
多维数组 切片索引
选行
a = np.eye(4) # 选2 3 行 a[[2,3]]
array([[0., 0., 1., 0.], [0., 0., 0., 1.]])
选列
a[:][2]
array([0., 0., 1., 0.])
a2d = np.arange(9).reshape(3,3) print(a2d) # 选择前2 print(a2d[:2]) # 选择 0 1 的0 print(a2d[:2,:1]) # 选择0 1 的 1 2 print(a2d[:2,1:]) # 选择 1 的 1 2 print(a2d[1,1:])
[[0 1 2] [3 4 5] [6 7 8]] [[0 1 2] [3 4 5]] [[0] [3]] [[1 2] [4 5]] [4 5]
a2d[:2,1:] = 0 a2d
array([[0, 0, 0], [3, 0, 0], [6, 7, 8]])
布尔型,索引
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) data = np.random.randn(7,4) print(names,data,sep='/n') print(names == 'Bob')
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']/n[[-0.75689604 0.88583323 -1.49689044 0.95657837] [-1.41937221 -0.22434237 1.74661417 0.08943732] [ 1.55542971 -1.73335114 0.22718931 0.07517303] [ 2.97463647 1.26849374 -0.38591486 0.1239141 ] [-1.0057703 -0.26292255 1.42444036 -2.02698881] [-0.00814679 0.38637696 -0.49233058 -1.13374044] [-1.03599764 -1.3238696 -0.70148144 0.59657573]] [ True False False True False False False]
print(data[names == 'Bob']) print(data[names == 'Bob',2:]) print(data[names == 'Bob',3])
[[-0.75689604 0.88583323 -1.49689044 0.95657837] [ 2.97463647 1.26849374 -0.38591486 0.1239141 ]] [[-1.49689044 0.95657837] [-0.38591486 0.1239141 ]] [0.95657837 0.1239141 ]
print(names != 'Bob') print(data[~(names == 'Bob')]) print(((names == 'Bob') | (names == 'Will')))
[False True True False True True True] [[-1.41937221 -0.22434237 1.74661417 0.08943732] [ 1.55542971 -1.73335114 0.22718931 0.07517303] [-1.0057703 -0.26292255 1.42444036 -2.02698881] [-0.00814679 0.38637696 -0.49233058 -1.13374044] [-1.03599764 -1.3238696 -0.70148144 0.59657573]] [ True False True True True False False]
data[data<0] = 0 data
array([[0. , 0.88583323, 0. , 0.95657837], [0. , 0. , 1.74661417, 0.08943732], [1.55542971, 0. , 0.22718931, 0.07517303], [2.97463647, 1.26849374, 0. , 0.1239141 ], [0. , 0. , 1.42444036, 0. ], [0. , 0.38637696, 0. , 0. ], [0. , 0. , 0. , 0.59657573]])
data[names != 'Joe'] = 7 data
array([[7. , 7. , 7. , 7. ], [0. , 0. , 1.74661417, 0.08943732], [7. , 7. , 7. , 7. ], [7. , 7. , 7. , 7. ], [7. , 7. , 7. , 7. ], [0. , 0.38637696, 0. , 0. ], [0. , 0. , 0. , 0.59657573]])
a = np.empty((8,4)) for i in range(8): a[i] = i a
array([[0., 0., 0., 0.], [1., 1., 1., 1.], [2., 2., 2., 2.], [3., 3., 3., 3.], [4., 4., 4., 4.], [5., 5., 5., 5.], [6., 6., 6., 6.], [7., 7., 7., 7.]])
# 以特定顺序选取子集 print(a[[5,2,1]]) # 以顺序选取列 a[:,2]
[[5. 5. 5. 5.] [2. 2. 2. 2.] [1. 1. 1. 1.]] array([0., 1., 2., 3., 4., 5., 6., 7.])
# 负索引 print(a[[-3,-6,-7]]) print(a[[-3,-6,-7],[0,1,2]]) print(a[[-3,-6,-7]][:, [0,1]])
[[5. 5. 5. 5.] [2. 2. 2. 2.] [1. 1. 1. 1.]] [5. 2. 1.] [[5. 5.] [2. 2.] [1. 1.]]
数组转置和轴对换
a = np.arange(8).reshape((2,4)) print(a) print(a.T)
[[0 1 2 3] [4 5 6 7]] [[0 4] [1 5] [2 6] [3 7]]
dot计算内积
一维: x0*x1 + y0*y1 二维: arr2[0][0] = arr0[0][0] * arr1[0][0] + arr0[0][1] * arr1[1][0] (0*0 + 1*1) arr2[0][1] = arr0[0][0] * arr1[0][1] + arr0[0][1] * arr1[1][1] (0*2 + 1*3) arr2[1][0] = arr0[1][0] * arr1[0][0] + arr0[1][1] * arr1[1][0] (2*0 + 3*1) arr2[1][1] = arr0[1][0] * arr1[0][1] + arr0[1][1] * arr1[1][1] (2*2 + 3*3)
a = np.arange(4).reshape((2,2)) print(a) print(a.T) print(np.dot(a[0],a[1])) print(np.dot(a, a.T))
[[0 1] [2 3]] [[0 2] [1 3]] 3 [[ 1 3] [ 3 13]]
transpose 交换坐标轴
维度 2 2 2 对应 012 相当交换维度
arr = np.arange(12).reshape((2, 2, 3)) print(arr) arr.transpose((2,1,0))
[[[ 0 1 2] [ 3 4 5]] [[ 6 7 8] [ 9 10 11]]] array([[[ 0, 6], [ 3, 9]], [[ 1, 7], [ 4, 10]], [[ 2, 8], [ 5, 11]]])
arr = np.arange(9).reshape(3,3) print(arr) arr.transpose((1,0))
[[0 1 2] [3 4 5] [6 7 8]] array([[0, 3, 6], [1, 4, 7], [2, 5, 8]])
来源:https://www.cnblogs.com/ham-731/p/12178703.html