多层神经网络。
代码先贴上,等有时间在补充基本理论。
代码中出现的SupervisedLearningModel、NNLayer和SoftmaxRegression,请参考上一篇笔记:
多层神经网络:
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
1 import numpy as np 2 from NNBase import NNLayer 3 from softmax import SoftmaxRegression 4 from dp.supervised import NNBase 5 from time import time 6 class MNN(NNBase.SupervisedLearningModel): 7 ''' 8 classdocs 9 ''' 10 def __init__(self, params): 11 ''' 12 Constructor 13 parameters: 14 params - the network configuration, dict 15 params.inputSize - dimension of input features 16 params.outputSize - number of output classes 17 params.layerSizes - an array, sizes of all layer, including all hidden layers and output layer 18 params.Lambda -scaling parameter for l2 weight regularization penalty 19 params.activitionFunc -which type of activation function to use in hidden layers 20 21 ''' 22 layerSizes = params['layerSizes'] 23 self.numLayers = len(layerSizes) 24 self.allLayers = [] 25 self.X=0 26 27 #initialize all hidden layers 28 inputSize = params['inputSize'] 29 for i in range(self.numLayers-1): 30 layer = NNLayer(inputSize,layerSizes[i],params['Lambda'],actFunc=params['activitionFunc'] ) 31 self.allLayers.append(layer) 32 inputSize=layerSizes[i] 33 #initialize the softmax layer - output layer 34 outputLayer=SoftmaxRegression(inputSize,params['outputSize'],params['Lambda']) 35 self.allLayers.append(outputLayer) 36 37 38 def rebuildTheta(self,theta): 39 ''' 40 convert the 1-dim weight to all layers weights and intercepts 41 overwrite the method of super class 42 ''' 43 starter=0 44 for i in range(self.numLayers): 45 thetaSize =(self.allLayers[i].inputSize+1)*self.allLayers[i].outputSize 46 th=theta[starter:starter+thetaSize] 47 starter=starter+thetaSize 48 self.allLayers[i].rebuildTheta(th) 49 50 def flatTheta(self): 51 ''' 52 convert all weights and intercept to 1-dim vector 53 overwrite the method of super class 54 ''' 55 theta= self.allLayers[0].flatTheta() 56 for i in range(self.numLayers-1): 57 temp = self.allLayers[i+1].flatTheta() 58 theta =np.hstack((theta,temp)) 59 60 return theta 61 62 def nnForward(self,theta,X,y): 63 ''' 64 the forward method 65 ''' 66 67 act=X 68 self.rebuildTheta(theta) 69 self.allLayers[-1].setTrainingLabels(y) 70 71 for i in range(self.numLayers): 72 self.allLayers[i].input=act 73 act = self.allLayers[i].forward() 74 75 return act 76 77 def cost(self, theta,X,y): 78 ''' 79 The cost function. 80 Parameters: 81 theta - The vector hold the weights and intercept, needed by scipy.optimize function 82 size: (numClasses - 1)*(numFeatures + 1) 83 ''' 84 h = np.log(self.nnForward(theta,X,y)) 85 #h * self.y_mat, apply the indicator function 86 cost = -np.sum(h *self.allLayers[-1].y_mat, axis=(0, 1))/ X.shape[1] 87 88 return cost 89 90 def gradient(self,theta,X,y): 91 ''' 92 compute the gradient. 93 overwrite the method of super class. 94 Parameters: 95 theta - 1-dim vector,containing all weights and intercepts 96 ''' 97 self.nnForward(theta,X,y) 98 99 i= self.numLayers-1100 grad = np.empty(0)101 while i>0:102 #get the gradient of one layer 103 gwb=self.allLayers[i].layerGradient() 104 #backpropagate the error terms 105 self.allLayers[i-1].delta=self.allLayers[i].backpropagate() 106 grad=np.hstack((gwb.ravel(),grad)) 107 i=i-1 108 #get the the gradient of the first hidden layer 109 gwb=self.allLayers[0].layerGradient() 110 111 grad=np.hstack((gwb.ravel(),grad)) 112 return grad113 114 def costFunc(self,theta,X,y):115 '''116 '''117 grad=self.gradient(theta, X, y)118 119 h=np.log(self.allLayers[-1].activation)120 cost = -np.sum(h * self.allLayers[-1].y_mat, axis=(0, 1))/X.shape[1]121 return cost,grad 122 123 def predict(self, Xtest):124 '''125 Prediction.126 overwrite the method of super class.127 Before calling this method, this model should be training128 Parameter:129 Xtest - The data to be predicted, numFeatures by numData130 '''131 act=Xtest132 133 for i in range(self.numLayers-1):134 self.allLayers[i].input=act135 act = self.allLayers[i].forward() 136 return self.allLayers[-1].predict(act) 137 138 def checkGradient(X,y):139 params = dict()140 params['inputSize']=X.shape[0]141 params['outputSize']=10142 params['layerSizes']=[50,20,10] 143 params['Lambda']=0144 params['activitionFunc']='sigmoid'145 146 testnn = MNN(params)147 148 #testnn.setTrainData(X, y)149 theta = testnn.flatTheta() 150 cost,grad = testnn.costFunc(theta,X,y)151 #print(np.size(theta)) 152 #print(np.size(grad)) 153 numgrad = np.zeros(grad.shape)154 155 e = 1e-6156 157 for i in range(np.size(grad)): 158 theta[i]=theta[i]-e159 loss1,g1 =testnn.costFunc(theta,X,y)160 theta[i]=theta[i]+2*e161 loss2,g2 = testnn.costFunc(theta,X,y)162 theta[i]=theta[i]-e 163 164 numgrad[i] = (-loss1 + loss2) / (2 * e)165 166 print(np.sum(np.abs(grad-numgrad))/np.size(grad))
随机梯度下降(改写自UFLDL的matlab随机梯度下降代码):
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
1 import numpy as np 2 def minFuncSGD(funcObj,theta,data,labels,options): 3 ''' 4 Runs stochastic gradient descent with momentum to optimize the 5 parameters for the given objective. 6 7 Parameters: 8 funObj - function handle which accepts as input theta, 9 data, labels and returns cost and gradient w.r.t10 to theta.11 theta - unrolled parameter vector12 data - stores data in m x n x numExamples tensor13 labels - corresponding labels in numExamples x 1 vector14 options - struct to store specific options for optimization15 16 Returns:17 opttheta - optimized parameter vector18 19 Options (* required)20 epochs* - number of epochs through data21 alpha* - initial learning rate22 minibatch* - size of minibatch23 momentum - momentum constant, defualts to 0.924 25 ''' 26 epochs =options['epochs']27 alpha = options['alpha']28 minibatch = options['minibatch']29 if options.get('momentum')==None:30 options['momentum']=0.931 m= labels.shape[0]32 mom=0.533 momIncrease = 2034 velocity = np.zeros(theta.shape)35 36 #SGD loop37 it =038 for e in range(epochs):39 rp=np.random.permutation(m)40 41 for i in range(0,m-minibatch,minibatch):42 it =it+143 #increase momentum after momIncrease iterations44 if it==momIncrease:45 mom=options['momentum']46 #get next randomly selected minibatch47 mb_data = data[:,rp[i:i+minibatch]]48 mb_labels = labels[rp[i:i+minibatch]]49 # evaluate the objective function on the next minibatch50 cost,grad = funcObj(theta,mb_data,mb_labels)51 '''52 Instructions: Add in the weighted velocity vector to the53 gradient evaluated above scaled by the learning rate.54 Then update the current weights theta according to the55 sgd update rule 56 ''' 57 velocity=mom*velocity+alpha*grad58 theta=theta-velocity 59 print('Epoch %d: Cost on iteration %d is %f\n' %(e,it,cost)) 60 #aneal learning rate by factor of two after each epoch61 alpha = alpha/2.062 63 return theta
测试:
使用MNIST数据集进行测试,正确率在96%左右。
测试代码:
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
1 if __name__ == '__main__': 2 X = np.load('../../common/trainImages.npy') / 255 3 X = X.T 4 y = np.load('../../common/trainLabels.npy') 5 ''' 6 X1=X[:,:10] 7 y1=y[:10] 8 checkGradient(X1,y1) 9 '''10 Xtest = np.load('../../common/testImages.npy') / 25511 Xtest = Xtest.T12 ytest = np.load('../../common/testLabels.npy') 13 params = dict()14 params['inputSize']=X.shape[0]15 params['outputSize']=1016 params['layerSizes']=[256,10] 17 params['Lambda']=018 params['activitionFunc']='sigmoid' 19 20 nn=MNN(params)21 t0=time()22 nn.train(X, y)23 print('training Time %.5f s' %(time()-t0))24 print('test acc :%.3f%%' % (nn.performance(Xtest,ytest)))25
存在的问题:
1.使用scipy.optimize中的fmin_cg和fmin_l_bfgs_b两个函数进行优化时,只有一个隐藏层没有问题,能得到想要的结果,但隐藏层多于一层的时候却得不到正确的结果,迭代次数只有个位数。而使用梯度下降或随机梯度下降法,对于多隐藏层的模型是可以得到想要的结果的。不知道是我实现的神经网络有问题还是scipy.optimize得问题!
2.在代码中对代价函数和梯度没有使用惩罚项。由于输出层采用softmax(固定最后一类的输出为0,不使用惩罚项),不知道是否要对隐藏层的参数进行规范化。不过从实际的结果看,不加任何惩罚项,其结果和使用二次代价函数+惩罚项的结果差不多。
posted on 2015-02-04 16:13 阅读( ...) 评论( ...)