▶ 卷积神经网络
● 代码,参考【https://zybuluo.com/hanbingtao/note/541458】。主要实现了卷积层神经 ConvLayer 和池化层神经 MaxPoolLayer。
1 import numpy as np
2
3 globalIta = 0.05
4 globalEpsilon = 1e-3
5
6 class ReluActivator(object): # ReLU 激活函数
7 def forward(self, x):
8 return max(0, x)
9
10 def backward(self, x):
11 return int(x > 0)
12
13 class IdentityActivator(object): # 激活函数
14 def forward(self, x):
15 return x
16
17 def backward(self, x):
18 return 1
19
20 def subArray(sArray, i, j, fRow, fCol, stride): # 获取卷积区域
21 startI = i * stride
22 startJ = j * stride
23 if sArray.ndim == 2:
24 return sArray[ startI : startI + fRow, startJ : startJ + fCol]
25 return sArray[:, startI : startI + fRow, startJ : startJ + fCol]
26
27 def get_max_index(array): # 获取一个2D区域的最大值所在的索引
28 maxI = 0
29 maxJ = 0
30 max_value = array[0,0]
31 for i in range(array.shape[0]):
32 for j in range(array.shape[1]):
33 if array[i,j] > max_value:
34 maxI, maxJ = i, j
35 max_value = array[i,j]
36 return maxI, maxJ
37
38 def conv(sArray, fArray, dArray, stride, fBias): # 计算卷积
39 dRow, dCol = dArray.shape
40 fRow, fCol = fArray.shape[:2]
41 for i in range(dRow):
42 for j in range(dCol):
43 dArray[i][j] = np.sum(subArray(sArray, i, j, fRow, fCol, stride) * fArray) + fBias
44
45 def padding(sArray, zpRow, zpCol = -1): # 为数组增加 Zero padding
46 if zpCol < 0:
47 zpCol = zpRow
48
49 if sArray.ndim == 2:
50 sRow, sCol = sArray.shape
51 dArray = np.zeros((sRow + 2 * zpRow, sCol + 2 * zpCol))
52 dArray[zpRow : zpRow + sRow, zpCol : zpCol + sCol] = sArray
53 return dArray
54
55 if sArray.ndim == 3:
56 sPage, sRow, sCol = sArray.shape
57 dArray = np.zeros((sPage, sRow + 2 * zpRow, sCol + 2 * zpCol))
58 dArray[:,zpRow : zpRow + sRow,zpCol : zpCol + sCol] = sArray
59 return dArray
60 return sArray
61
62 def myMap(array, op): # 对将数组做成迭代器,对每个元素依次操作
63 for i in np.nditer(array, op_flags=['readwrite']):
64 i[...] = op(i)
65
66 class Filter(object): # 卷积窗口类,三个维度
67 def __init__(self, row, col, page):
68 self.page = page
69 self.row = row
70 self.col = col
71 self.fArray = np.random.uniform(-1e-4, 1e-4, (self.page, self.row, self.col))
72 self.fBias = 0
73 self.fArrayGrad = np.zeros(self.fArray.shape)
74 self.fBiasGrad = 0
75
76 def __repr__(self):
77 return 'filter fArray:\n%s\nbias:\n%s' % (repr(self.fArray), repr(self.fBias))
78
79 def update(self, ita): # 使用梯度对窗口权值进行更新
80 self.fArray -= ita * self.fArrayGrad
81 self.fBias -= ita * self.fBiasGrad
82
83 class ConvLayer(object): # 单层卷积神经网络层,初始化时规定了输入图像、卷积窗口、输出图像的尺寸,并保存了卷积窗口和输出图像的数据
84 def __init__(self, sRow, sCol, nChannel, fRow, fCol, nFilter, zeroPad, stride = 1, activator = IdentityActivator(), ita = globalIta):
85 self.sRow = sRow
86 self.sCol = sCol
87 self.nChannel = nChannel
88 self.fRow = fRow
89 self.fCol = fCol
90 self.nFilter = nFilter
91 self.zeroPad = zeroPad
92 self.stride = stride
93 self.activator = activator
94 self.ita = ita
95 self.filters = [ Filter(self.fRow, self.fCol, self.nChannel) for i in range(self.nFilter) ]
96 self.dRow = ConvLayer.calculateDSize(self.sRow, fRow, self.zeroPad, self.stride)
97 self.dCol = ConvLayer.calculateDSize(self.sCol, fCol, self.zeroPad, self.stride)
98 self.dArray = np.zeros((self.nFilter, self.dRow, self.dCol))
99
100 @staticmethod # 计算输出图像的大小
101 def calculateDSize(input_size, filter_size, zeroPad, stride):
102 return (input_size - filter_size + 2 * zeroPad) // stride + 1
103
104 def forward(self, sArray): # 正向卷积输出
105 self.sArray = sArray
106 self.sPadArray = padding(sArray, self.zeroPad)
107 for f in range(self.nFilter):
108 filter = self.filters[f]
109 conv(self.sPadArray, filter.fArray, self.dArray[f], self.stride, filter.fBias)
110 myMap(self.dArray, self.activator.forward)
111
112 def backward(self, sArray, deltaArrayNextLayer, activator): # 计算误差项和窗口梯度
113 self.forward(sArray)
114 self.bpDeltaAndGrad(deltaArrayNextLayer, activator)
115
116 def update(self): # 使用梯度每个窗口
117 for filter in self.filters:
118 filter.update(self.ita)
119
120 def bpDeltaAndGrad(self, deltaArrayNextLayer, activator): # 计算传递到上一层的sensitivity map
121 exArray = self.expandDelta(deltaArrayNextLayer) # 将次层误差项扩张为 stride = 1 的情形
122 exPAge, exRow, exCol = exArray.shape # 对 exArray 垫边,次层误差项的边缘也会获得残差,但不会向上传递
123 exPadArray = padding(exArray, (self.sRow + self.fRow - 1 - exRow) // 2, (self.sCol + self.fCol - 1 - exCol) // 2)
124
125 self.deltaArray = np.zeros((self.nChannel,self.sRow, self.sCol)) # 本层 deltaArray
126 for f in range(self.nFilter): # 依次计算每个窗口
127 filter = self.filters[f]
128 roteteFArray = np.array(list(map(lambda i: np.rot90(i, 2), filter.fArray))) # 旋转卷积窗口,进行数学意义上的卷积
129 temp = np.zeros((self.nChannel,self.sRow, self.sCol)) # 有多个 filter,则最终误差项等于各窗口误差项之和,需要累加
130 for d in range(self.nChannel):
131 conv(exPadArray[f], roteteFArray[d], temp[d], 1, 0) # 计算误差项
132 conv(self.sPadArray[d], exArray[f], filter.fArrayGrad[d], 1, 0) # 计算梯度
133 self.deltaArray += temp
134 filter.fBiasGrad = np.sum(exArray[f]) # 计算偏移值
135
136 temp = np.array(self.sArray) # 将误差项结果与激活函数的偏导数相乘
137 myMap(temp, activator.backward)
138 self.deltaArray *= temp
139
140 def expandDelta(self, deltaArray):
141 exRow = (self.sRow - self.fRow + 2 * self.zeroPad + 1) # 计算 stride 恢复到 1 时的 delta 阵大小
142 exCol = (self.sCol - self.fCol + 2 * self.zeroPad + 1)
143
144 exArray = np.zeros((deltaArray.shape[0], exRow, exCol)) # 构建新的 delta 阵
145 for i in range(self.dRow):
146 for j in range(self.dCol):
147 exArray[:, i * self.stride, j * self.stride] = deltaArray[:, i, j]
148 return exArray
149
150 class MaxPoolLayer(object): # 池化类,初始化时规定了输入图像、卷积窗口、输出图像的尺寸,并保存了输出图像的数据
151 def __init__(self, sRow, sCol, nChannel, fRow, fCol, stride):
152 self.sRow = sRow
153 self.sCol = sCol
154 self.nChannel = nChannel
155 self.fRow = fRow
156 self.fCol = fCol
157 self.stride = stride
158 self.dRow = (sRow - fRow) // self.stride + 1
159 self.dCol = (sCol - fCol) // self.stride + 1
160 self.dArray = np.zeros((self.nChannel,self.dRow, self.dCol)) # 正向池化
161
162 def forward(self, sArray):
163 for d in range(self.nChannel):
164 for i in range(self.dRow):
165 for j in range(self.dCol):
166 self.dArray[d,i,j] = (subArray(sArray[d], i, j, self.fRow, self.fCol, self.stride).max())
167
168 def backward(self, sArray, deltaArrayNextLayer): # 反向池化
169 self.deltaArray = np.zeros(sArray.shape)
170 for d in range(self.nChannel):
171 for i in range(self.dRow):
172 for j in range(self.dCol):
173 patch_array = subArray(sArray[d], i, j, self.fRow, self.fCol, self.stride)
174 nonZeroRow, nonZeroCol = get_max_index(patch_array)
175 self.deltaArray[d, i * self.stride + nonZeroRow, j * self.stride + nonZeroCol] = deltaArrayNextLayer[d,i,j]
176
177 def gradCheck(): # 梯度检查
178 sArray, deltaNextLayer, convLayer = createTestDataConv()
179 convLayer.forward(sArray)
180 deltaArrayNextLayer = np.ones(convLayer.dArray.shape, dtype=np.float64)
181 convLayer.backward(sArray, deltaArrayNextLayer, IdentityActivator())
182 for d in range(convLayer.filters[0].fArrayGrad.shape[0]):
183 for i in range(convLayer.filters[0].fArrayGrad.shape[1]):
184 for j in range(convLayer.filters[0].fArrayGrad.shape[2]):
185 convLayer.filters[0].fArray[d,i,j] += globalEpsilon
186 convLayer.forward(sArray)
187 err1 = np.sum(convLayer.dArray)
188 convLayer.filters[0].fArray[d,i,j] -= 2*globalEpsilon
189 convLayer.forward(sArray)
190 err2 = np.sum(convLayer.dArray)
191 expect_grad = (err1 - err2) / (2 * globalEpsilon)
192 convLayer.filters[0].fArray[d,i,j] += globalEpsilon
193 print('fArray(%d,%d,%d): expected - actural %f - %f' % (d, i, j, expect_grad, convLayer.filters[0].fArrayGrad[d,i,j]))
194
195 def createTestDataConv(): # 生成卷积测试数据
196 a = np.array(
197 [[[0,1,1,0,2],[2,2,2,2,1],[1,0,0,2,0],[0,1,1,0,0],[1,2,0,0,2]],
198 [[1,0,2,2,0],[0,0,0,2,0],[1,2,1,2,1],[1,0,0,0,0],[1,2,1,1,1]],
199 [[2,1,2,0,0],[1,0,0,1,0],[0,2,1,0,1],[0,1,2,2,2],[2,1,0,0,1]]]
200 )
201 b = np.array( [[[0,1,1],[2,2,2],[1,0,0]],[[1,0,2],[0,0,0],[1,2,1]]] )
202 c = ConvLayer(5,5,3,3,3,2,1,2,IdentityActivator(),0.001)
203 c.filters[0].fArray = np.array( [[[-1,1,0],[0,1,0],[0,1,1]],[[-1,-1,0],[0,0,0],[0,-1,0]],[[0,0,-1],[0,1,0],[1,-1,-1]]], dtype=np.float64 )
204 c.filters[0].fBias = 1
205 c.filters[1].fArray = np.array( [[[1,1,-1],[-1,-1,1],[0,-1,1]],[[0,1,0],[-1,0,-1],[-1,1,0]],[[-1,0,0],[-1,0,1],[-1,0,0]]], dtype=np.float64 )
206 c.filters[1].fBias = 0
207 return a, b, c
208
209 def createTestDataPool(): # 生成池化测试数据
210 a = np.array( [[[1,1,2,4],[5,6,7,8],[3,2,1,0],[1,2,3,4]],[[0,1,2,3],[4,5,6,7],[8,9,0,1],[3,4,5,6]]], dtype=np.float64 )
211 b = np.array( [[[1,2],[2,4]],[[3,5],[8,2]]], dtype=np.float64 )
212 c = MaxPoolLayer(4,4,2,2,2,2)
213 return a, b, c
214
215 def testConv():
216 print("\ntestConv")
217 sArray, deltsNextLayer, convLayer = createTestDataConv()
218 convLayer.forward(sArray) # 前向卷积
219 print(convLayer.dArray)
220 convLayer.backward(sArray, deltsNextLayer, IdentityActivator()) # 后向修正窗口权值
221 convLayer.update()
222 print(convLayer.filters[0])
223 print(convLayer.filters[1])
224
225 def testPool():
226 print("\ntestPool")
227 sArray, deltaNextLayer, maxPoolLayer = createTestDataPool()
228 maxPoolLayer.forward(sArray) # 正向池化
229 print('input array:\n%s\noutput array:\n%s' % (sArray,maxPoolLayer.dArray))
230 maxPoolLayer.backward(sArray, deltaNextLayer) # 反向池化
231 print('input array:\n%s\nsensitivity array:\n%s\ndelta array:\n%s' % (sArray, deltaNextLayer, maxPoolLayer.deltaArray))
232
233 if __name__ == '__main__':
234 testConv()
235 testPool()
236 gradCheck()
● 输出结果
[[[ 6. 7. 5.]
[ 3. -1. -1.]
[ 2. -1. 4.]]
[[ 2. -5. -8.]
[ 1. -4. -4.]
[ 0. -5. -5.]]]
filter weights:
array([[[-1.008, 0.99 , -0.009],
[-0.005, 0.994, -0.006],
[-0.006, 0.995, 0.996]],
[[-1.004, -1.001, -0.004],
[-0.01 , -0.009, -0.012],
[-0.002, -1.002, -0.002]],
[[-0.002, -0.002, -1.003],
[-0.005, 0.992, -0.005],
[ 0.993, -1.008, -1.007]]])
bias:
0.991
filter weights:
array([[[ 9.980e-01, 9.980e-01, -1.001e+00],
[-1.004e+00, -1.007e+00, 9.970e-01],
[-4.000e-03, -1.004e+00, 9.980e-01]],
[[ 0.000e+00, 9.990e-01, 0.000e+00],
[-1.009e+00, -5.000e-03, -1.004e+00],
[-1.004e+00, 1.000e+00, 0.000e+00]],
[[-1.004e+00, -6.000e-03, -5.000e-03],
[-1.002e+00, -5.000e-03, 9.980e-01],
[-1.002e+00, -1.000e-03, 0.000e+00]]])
bias:
-0.007
input array:
[[[1. 1. 2. 4.]
[5. 6. 7. 8.]
[3. 2. 1. 0.]
[1. 2. 3. 4.]]
[[0. 1. 2. 3.]
[4. 5. 6. 7.]
[8. 9. 0. 1.]
[3. 4. 5. 6.]]]
output array:
[[[6. 8.]
[3. 4.]]
[[5. 7.]
[9. 6.]]]
input array:
[[[1. 1. 2. 4.]
[5. 6. 7. 8.]
[3. 2. 1. 0.]
[1. 2. 3. 4.]]
[[0. 1. 2. 3.]
[4. 5. 6. 7.]
[8. 9. 0. 1.]
[3. 4. 5. 6.]]]
sensitivity array:
[[[1. 2.]
[2. 4.]]
[[3. 5.]
[8. 2.]]]
delta array:
[[[0. 0. 0. 0.]
[0. 1. 0. 2.]
[2. 0. 0. 0.]
[0. 0. 0. 4.]]
[[0. 0. 0. 0.]
[0. 3. 0. 5.]
[0. 8. 0. 0.]
[0. 0. 0. 2.]]]
weights(0,0,0): expected - actural 5.000000 - 5.000000
weights(0,0,1): expected - actural 6.000000 - 6.000000
weights(0,0,2): expected - actural 5.000000 - 5.000000
weights(0,1,0): expected - actural 5.000000 - 5.000000
weights(0,1,1): expected - actural 7.000000 - 7.000000
weights(0,1,2): expected - actural 5.000000 - 5.000000
weights(0,2,0): expected - actural 5.000000 - 5.000000
weights(0,2,1): expected - actural 6.000000 - 6.000000
weights(0,2,2): expected - actural 5.000000 - 5.000000
weights(1,0,0): expected - actural 2.000000 - 2.000000
weights(1,0,1): expected - actural 1.000000 - 1.000000
weights(1,0,2): expected - actural 2.000000 - 2.000000
weights(1,1,0): expected - actural 9.000000 - 9.000000
weights(1,1,1): expected - actural 9.000000 - 9.000000
weights(1,1,2): expected - actural 9.000000 - 9.000000
weights(1,2,0): expected - actural 2.000000 - 2.000000
weights(1,2,1): expected - actural 1.000000 - 1.000000
weights(1,2,2): expected - actural 2.000000 - 2.000000
weights(2,0,0): expected - actural 4.000000 - 4.000000
weights(2,0,1): expected - actural 5.000000 - 5.000000
weights(2,0,2): expected - actural 4.000000 - 4.000000
weights(2,1,0): expected - actural 4.000000 - 4.000000
weights(2,1,1): expected - actural 9.000000 - 9.000000
weights(2,1,2): expected - actural 4.000000 - 4.000000
weights(2,2,0): expected - actural 4.000000 - 4.000000
weights(2,2,1): expected - actural 5.000000 - 5.000000
weights(2,2,2): expected - actural 4.000000 - 4.000000