Numpy优势
1、定义:开源的Python科学计算库,用于快速处理任意维度的数组,Numpy中,存储对象是ndarray
2、创建:np.array([])
3、numpy的优势:内存块风格、支持并行化运算、体式存储、效率高于纯Python代码、底层使用了C,内部释放了GIL
一、numpy 计算效率
python
import numpy as np
score = np.array([[80, 89, 86, 67, 79],
[78, 97, 89, 67, 81],
[90, 94, 78, 67, 74],
[91, 91, 90, 67, 69],
[76, 87, 75, 67, 86],
[70, 79, 84, 67, 84],
[94, 92, 93, 67, 64],
[86, 85, 83, 67, 80]])
print(score.sum()) # 3210
计算时间对比
python
import numpy as np
import time
import random
a = []
for i in range(10000000):
a.append(random.random())
t1 = time.time()
sum1 = sum(a)
t2 = time.time()
b = np.array(a)
t3 = time.time()
sum2 = np.sum(b)
t4 = time.time()
print("t2 - t1 = ", t2 - t1) # 0.07523679733276367
print("t4 - t3 = ", t4 - t3) # 0.008289098739624023
二、ndarray属性
python
import numpy as np
score = np.array([[80, 89, 86, 67, 79],
[78, 97, 89, 67, 81],
[90, 94, 78, 67, 74],
[91, 91, 90, 67, 69],
[76, 87, 75, 67, 86],
[70, 79, 84, 67, 84],
[94, 92, 93, 67, 64],
[86, 85, 83, 67, 80]])
print(score.shape) # (8, 5) score.shape 数组维度的元组
print(score.ndim) # 2 score.ndim 数组维数
print(score.size) # 40 score.size 数组中的元素数量
print(score.itemsize) # 8 score.itemsize 每个数组元素的长度(字节)
print(score.dtype) # int64 score.dtype 数组元素的类型
三、ndarray的数组形状
python
import numpy as np
a = np.array([1, 2, 3])
# print(a)
print(a.shape) # (3,)
print(a.ndim) # 1
b = np.array([[1, 2, 3], [2, 3, 4]])
# print(b)
print(b.shape) # (2,3)
print(b.ndim) # 2
c = np.array([[[1, 2, 3], [2, 3, 4], [2, 3, 4]], [[3, 4, 5], [2, 3, 4], [4, 5, 6]]])
# print(c)
print(c.shape) # (2, 3, 3) # 2大块,3行,3个元素
print(c.ndim) # 3
四、ndarray的类型
python
import numpy as np
a = np.array([1, 2, 3])
print(a.dtype) # int64
d = np.array([1, 2, 3], dtype=np.float32)
print(d.dtype) # float32
e = np.array(["I", "love", "python"], dtype=np.string_)
print(e, e.dtype) # [b'I' b'love' b'python'] |S6
五、生成数组
python
import numpy as np
oness = np.ones([4, 8])
print(oness)
print("-----")
zeros_likes = np.zeros_like(oness)
print(zeros_likes)

六、深浅拷贝
python
import numpy as np
a = np.array([[1, 2, 3], [4, 5, 6]])
a1 = np.array(a) # 深拷贝
print(a1) # [[1 2 3][4 5 6]]
print("-----------")
a2 = np.asarray(a) # 浅拷贝
print(a2) # [[1 2 3][4 5 6]]
print("-----------")
a[0, 0] = 100
print("a = ", a) # [[100 2 3] [4 5 6]]
print("a1 = ", a1) # [[1 2 3] [4 5 6]]
print("a2 = ", a2) # [[100 2 3] [4 5 6]]
生成数据
python
import numpy as np
a = np.linspace(0, 100, 11) # linspace 生成等间隔多少个的数据
print(a) # [ 0. 10. 20. 30. 40. 50. 60. 70. 80. 90. 100.]
b = np.arange(10, 50, 2) # arange 每间隔多少生成的数据
print(b) # [10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48]
c = np.logspace(0, 2, 3) # logspace 生成10的x次(从10的0次方到10的2次方,间隔3个)
print(c) # [ 1. 10. 100.]
七、生成随机数
1、numpy.random.uniform(low=0.0, high=1.0, size=None)
生成size个符合均分布的浮点数,取值范围为[low, high),默认取值范围为[0, 1.0)
2、numpy.random.rand(d0, d1, ..., dn)
生成一个(d0, d1, ..., dn)维的数组,数组的元素取自[0, 1)上的均匀分布,若没有参数输入,则生成一个数
3、numpy.random.randint(low, high=None, size=None, dtype='I')
生成size个整数,取值区间为[low, high),若没有输入参数high则取值区间为[0, low)
python
import numpy as np
c = np.random.uniform(low=1, high=10, size=(3, 5))
print(c)
print("-------")
a = np.random.rand(2, 3)
print(a)
print("-------")
b = np.random.randint(1, 10, (3, 5))
print(b)

八、生成均匀分布图
python
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(0, 1, 1000000)
# 1、创建画布
plt.figure(figsize=(20, 8), dpi=100)
# 2、图像绘制
plt.hist(x, bins=1000) # 直方图
# 3、显示图像
plt.rcParams["font.sans-serif"] = ["Heiti TC"] # 解决中文乱码
plt.rcParams["axes.unicode_minus"] = False # 解决保存图片为框
plt.show()

九、正态分布:均值,方差
均值:图形的左右位置
方差:图像是瘦,还是胖,值越小,图形越瘦高,数据越集中,值越大,图形越矮胖,数据越分散
np.random.normal(loc=0.0, scale=1.0, size=None)
该函数有三个参数:loc, scale, size
loc表示随机数的期望值(对应着整个分布的中心)。float ,loc=0说明这一个以Y轴为对称轴的正态分布
scale表示随机数的标准差。float ,(对应于分布的宽度,scale越大越矮胖,scale越小,越瘦高)
size表示生成的随机数的个数。int or tuple of ints 输出的shape,默认为None,只输出一个值
python
import numpy as np
import matplotlib.pyplot as plt
x = np.random.normal(1.75, 1, 10000000)
# 1、创建画布
plt.figure(figsize=(20, 8), dpi=100)
# 2、图像绘制
plt.hist(x, bins=1000)
# 3、显示图像
plt.rcParams["font.sans-serif"] = ["Heiti TC"] # 解决中文乱码
plt.rcParams["axes.unicode_minus"] = False # 解决保存图片为框
plt.show()

十、数组和索引
python
import numpy as np
a = np.array([[[1, 2, 3], [4, 5, 6]], [[2, 3, 4], [5, 6, 7]]])
# print(a)
print(a[1, 1, 2]) # 7
十一、取值
python
import numpy as np
stock_change = np.random.normal(0, 1, (4, 5))
print(stock_change)
print("------------")
print("前两行三列 = ", stock_change[0:2, 0:3])
print("------------")

十二、形状修改
1、对象.reshape:不进行行列互换,产生新变量
2、对象.resize:进行行列互换,对原值进行更改
3、对象.T:进行行列互换
python
import numpy as np
stock_change = np.random.normal(0, 1, (4, 5))
a = stock_change.reshape([5, 4]) # 不改变原数据
print("修改为5行4列 \n", a)
print("------------")
print(stock_change)
print("------------")
stock_change.resize([2, 10]) # 改变原数据为2行10列
print(stock_change)
print("------------")
print(stock_change.T) # 行列互换

十三、类型修改
python
import numpy as np
stock_change = np.random.normal(0, 1, (4, 5))
print(stock_change)
a = stock_change.astype(np.int32) # 整形
print(a)
# b = stock_change.tostring() # 字符串(已废弃)
b = stock_change.tobytes() # 符串
print(b)

十四、去重
python
import numpy as np
a = np.array([[1, 2, 3, 4], [3, 4, 5, 6]])
print(a) # [[1 2 3 4][3 4 5 6]]
b = np.unique(a)
print(b) # [1 2 3 4 5 6]
十五、ndarray 的运算
逻辑运算
python
import numpy as np
a = np.random.normal(0, 1, (8, 10))
print(a)
print("------------")
print(a[0:5, 0:5])
print("------------")
print(a > 1)
print("------------")
a[a > 1] = 2 # 大于1的值赋值为2
print(a)
\[-0.51900735 -0.14271233 1.01005954 -0.86900061 -0.93724825 0.80252265 -0.95580861 -1.44488356 -0.57791264 -1.13413082
1.05058831 -1.54560852 0.43385941 0.88128512 2.44099593 0.34949527 -0.95622858 0.37991692 0.54569224 -0.75352017
0.39825805 -2.16934883 1.26776266 -2.21747411 -0.45542472 1.0307148 0.87466259 1.36620219 0.84665957 -0.94000536
-1.31879071 0.07944813 0.2495135 -1.83407838 1.34390881 0.08627338 0.4670269 0.10993878 0.43422025 1.83733298
-0.15992854 -2.33966934 -0.04066878 -1.29530305 -1.34624933 1.17315284 -0.88062213 -0.61243776 0.2095878 0.16989294
0.90521838 -0.19191445 -0.39435977 -0.57472974 0.32285371 -1.41945 -0.50926629 0.15733278 0.77452486 0.11358008
1.66469677 -0.54742563 2.21895541 -0.23362342 0.94280006 1.60274991 0.17968994 1.06238471 -0.97442727 0.02435354
1.50489373 -0.3583253 -1.13720051 -1.07801469 -0.20929034 0.78332422 -0.05183476 -0.98637273 -0.35651872 0.71030747\]
\[-0.51900735 -0.14271233 1.01005954 -0.86900061 -0.93724825
1.05058831 -1.54560852 0.43385941 0.88128512 2.44099593
0.39825805 -2.16934883 1.26776266 -2.21747411 -0.45542472
-1.31879071 0.07944813 0.2495135 -1.83407838 1.34390881
-0.15992854 -2.33966934 -0.04066878 -1.29530305 -1.34624933\]
\[False False True False False False False False False False
True False False False True False False False False False
False False True False False True False True False False
False False False False True False False False False True
False False False False False True False False False False
False False False False False False False False False False
True False True False False True False True False False
True False False False False False False False False False\]
\[-0.51900735 -0.14271233 2. -0.86900061 -0.93724825 0.80252265 -0.95580861 -1.44488356 -0.57791264 -1.13413082
2. -1.54560852 0.43385941 0.88128512 2. 0.34949527 -0.95622858 0.37991692 0.54569224 -0.75352017
0.39825805 -2.16934883 2. -2.21747411 -0.45542472 2. 0.87466259 2. 0.84665957 -0.94000536
-1.31879071 0.07944813 0.2495135 -1.83407838 2. 0.08627338 0.4670269 0.10993878 0.43422025 2.
-0.15992854 -2.33966934 -0.04066878 -1.29530305 -1.34624933 2. -0.88062213 -0.61243776 0.2095878 0.16989294
0.90521838 -0.19191445 -0.39435977 -0.57472974 0.32285371 -1.41945 -0.50926629 0.15733278 0.77452486 0.11358008
2. -0.54742563 2. -0.23362342 0.94280006 2. 0.17968994 2. -0.97442727 0.02435354
2. -0.3583253 -1.13720051 -1.07801469 -0.20929034 0.78332422 -0.05183476 -0.98637273 -0.35651872 0.71030747\]
十六、通用判断函数
python
import numpy as np
a = np.random.normal(0, 1, (8, 10))
b = a[0:2, 0:5]
print(b)
c = np.all(b > 0)
print(c) # 所有满足要求才为True
d = np.any(b > 0)
print(d) # 有一个满足就为True

十七、三元运算
python
import numpy as np
a = np.random.normal(0, 1, (8, 10))
b = a[0:2, 0:5]
print(b)
c = np.where(b > 0, 1, 0) # 满足要求赋值1,不满足赋值0
print(c)
d = np.where(np.logical_and(b > 1, b < 2), 1, 0) # 同时满足多个条件
print(d)
e = np.where(np.logical_or(b > 0, b < -1), 3, 2) # 满足一个条件
print(e)
十八、统计运算
python
import numpy as np
a = np.random.normal(0, 1, (2, 5))
b = a.max(axis=0) # axis=0 表示列,axis=1表示行
print(b)
print(b.argmax()) # 返回最大值的索引位

十九、矩阵
1.矩阵和向量
矩阵:理解-二维数组
向量:理解-一维数组
2.加法和标量乘法
加法:对应位置相加
乘法:标量和每个位置的元素相乘
3.矩阵向量(矩阵)乘法:[M行,N列] * [N行,L列] = [M行,L列]
4.矩阵乘法性质
满足结合律,不满足交换律
5.单位矩阵
对角线为1,其他位置为0的矩阵
6.逆
矩阵A * 矩阵B = 单位矩阵I,那么A和B就互为逆矩阵
7.转置
行列互换
案例矩阵运算
python
import numpy as np
a = np.array([[80, 86], [86, 85], [86, 89], [85, 81], [88, 90], [82, 87], [94, 99], [95, 99]])
b = np.array([[0.7], [0.3]])
print(np.matmul(a, b)) # matmul不支持矩阵和数字相乘
print(np.dot(a, b)) # dot支持点乘
print(np.dot(a, 10))

二十、数据间运算
python
import numpy as np
a = np.array([1, 2, 3, 4])
print(a + 1)
print(a / 2)
print(a * 10)
b = np.array([[1, 2, 3, 2, 1, 4], [5, 6, 1, 2, 3, 1]])
c = np.array([[1], [3]])
print(b + c) # [[2 3 4 3 2 5],[8 9 4 5 6 4]]
