博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Deep Learning 学习笔记(二)——Multi-Layer Neural Network
阅读量:7201 次
发布时间:2019-06-29

本文共 9907 字,大约阅读时间需要 33 分钟。

 多层神经网络。

代码先贴上,等有时间在补充基本理论。

代码中出现的SupervisedLearningModel、NNLayer和SoftmaxRegression,请参考上一篇笔记:

多层神经网络:

1 import numpy as np  2 from NNBase import NNLayer  3 from softmax import SoftmaxRegression  4 from dp.supervised import NNBase  5 from time import time  6 class MNN(NNBase.SupervisedLearningModel):  7     '''  8     classdocs  9     ''' 10     def __init__(self, params): 11         ''' 12         Constructor 13         parameters: 14             params     - the network configuration, dict 15             params.inputSize        - dimension of input features 16             params.outputSize        - number of output classes 17             params.layerSizes        - an array, sizes of all layer, including all hidden layers and output layer 18             params.Lambda            -scaling parameter for l2 weight regularization penalty 19             params.activitionFunc    -which type of activation function to use in hidden layers 20          21         '''          22         layerSizes =  params['layerSizes'] 23         self.numLayers = len(layerSizes) 24         self.allLayers = [] 25         self.X=0 26          27         #initialize all hidden layers 28         inputSize = params['inputSize'] 29         for i in range(self.numLayers-1): 30             layer = NNLayer(inputSize,layerSizes[i],params['Lambda'],actFunc=params['activitionFunc'] ) 31             self.allLayers.append(layer) 32             inputSize=layerSizes[i] 33         #initialize the softmax layer - output layer 34         outputLayer=SoftmaxRegression(inputSize,params['outputSize'],params['Lambda']) 35         self.allLayers.append(outputLayer) 36          37       38     def rebuildTheta(self,theta): 39         ''' 40         convert the 1-dim weight to all layers weights and intercepts 41         overwrite the method of super class  42         '''   43         starter=0 44         for i in range(self.numLayers): 45             thetaSize =(self.allLayers[i].inputSize+1)*self.allLayers[i].outputSize             46             th=theta[starter:starter+thetaSize] 47             starter=starter+thetaSize 48             self.allLayers[i].rebuildTheta(th)      49      50     def flatTheta(self): 51         ''' 52         convert all weights and intercept to 1-dim vector 53         overwrite the method of super class 54         ''' 55         theta= self.allLayers[0].flatTheta() 56         for i in range(self.numLayers-1): 57             temp = self.allLayers[i+1].flatTheta() 58             theta =np.hstack((theta,temp))  59                        60         return theta 61      62     def nnForward(self,theta,X,y): 63         ''' 64          the forward method  65         ''' 66          67         act=X 68         self.rebuildTheta(theta) 69         self.allLayers[-1].setTrainingLabels(y)         70          71         for i in range(self.numLayers): 72             self.allLayers[i].input=act 73             act = self.allLayers[i].forward()             74          75         return act 76  77     def cost(self, theta,X,y): 78         ''' 79         The cost function. 80         Parameters: 81             theta    - The vector hold the weights and intercept, needed by scipy.optimize function 82                        size: (numClasses - 1)*(numFeatures + 1)         83         ''' 84         h = np.log(self.nnForward(theta,X,y)) 85         #h * self.y_mat, apply the indicator function 86         cost = -np.sum(h *self.allLayers[-1].y_mat, axis=(0, 1))/ X.shape[1] 87          88         return cost  89      90     def gradient(self,theta,X,y): 91         ''' 92         compute the gradient. 93         overwrite the method of super class. 94         Parameters: 95             theta    - 1-dim vector,containing all weights and intercepts 96         ''' 97         self.nnForward(theta,X,y) 98          99         i= self.numLayers-1100         grad = np.empty(0)101         while i>0:102             #get the gradient of one layer                                103             gwb=self.allLayers[i].layerGradient() 104             #backpropagate the error terms 105             self.allLayers[i-1].delta=self.allLayers[i].backpropagate()             106             grad=np.hstack((gwb.ravel(),grad))                      107             i=i-1         108         #get the the gradient of the first hidden layer        109         gwb=self.allLayers[0].layerGradient() 110                    111         grad=np.hstack((gwb.ravel(),grad))      112         return grad113     114     def costFunc(self,theta,X,y):115         '''116         '''117         grad=self.gradient(theta, X, y)118         119         h=np.log(self.allLayers[-1].activation)120         cost = -np.sum(h * self.allLayers[-1].y_mat, axis=(0, 1))/X.shape[1]121         return cost,grad   122             123     def predict(self, Xtest):124         '''125         Prediction.126         overwrite the method of super class.127         Before calling this method, this model should be training128         Parameter:129             Xtest    - The data to be predicted, numFeatures by numData130         '''131         act=Xtest132                133         for i in range(self.numLayers-1):134             self.allLayers[i].input=act135             act = self.allLayers[i].forward() 136         return self.allLayers[-1].predict(act)  137 138 def checkGradient(X,y):139     params = dict()140     params['inputSize']=X.shape[0]141     params['outputSize']=10142     params['layerSizes']=[50,20,10]    143     params['Lambda']=0144     params['activitionFunc']='sigmoid'145     146     testnn = MNN(params)147     148     #testnn.setTrainData(X, y)149     theta = testnn.flatTheta()    150     cost,grad = testnn.costFunc(theta,X,y)151     #print(np.size(theta))  152     #print(np.size(grad))  153     numgrad = np.zeros(grad.shape)154     155     e = 1e-6156     157     for i in range(np.size(grad)):         158         theta[i]=theta[i]-e159         loss1,g1 =testnn.costFunc(theta,X,y)160         theta[i]=theta[i]+2*e161         loss2,g2 = testnn.costFunc(theta,X,y)162         theta[i]=theta[i]-e            163         164         numgrad[i] = (-loss1 + loss2) / (2 * e)165         166     print(np.sum(np.abs(grad-numgrad))/np.size(grad))
View Code

 

随机梯度下降(改写自UFLDL的matlab随机梯度下降代码):

1 import numpy as np 2 def minFuncSGD(funcObj,theta,data,labels,options): 3     ''' 4     Runs stochastic gradient descent with momentum to optimize the 5     parameters for the given objective. 6      7     Parameters: 8       funObj     -  function handle which accepts as input theta, 9                     data, labels and returns cost and gradient w.r.t10                     to theta.11       theta      -  unrolled parameter vector12       data       -  stores data in m x n x numExamples tensor13       labels     -  corresponding labels in numExamples x 1 vector14       options    -  struct to store specific options for optimization15     16      Returns:17       opttheta   -  optimized parameter vector18     19      Options (* required)20       epochs*     - number of epochs through data21       alpha*      - initial learning rate22       minibatch*  - size of minibatch23       momentum    - momentum constant, defualts to 0.924     25     '''    26     epochs =options['epochs']27     alpha = options['alpha']28     minibatch = options['minibatch']29     if options.get('momentum')==None:30         options['momentum']=0.931     m= labels.shape[0]32     mom=0.533     momIncrease = 2034     velocity = np.zeros(theta.shape)35     36     #SGD loop37     it =038     for e in range(epochs):39         rp=np.random.permutation(m)40         41         for i in range(0,m-minibatch,minibatch):42             it =it+143             #increase momentum after momIncrease iterations44             if it==momIncrease:45                 mom=options['momentum']46             #get next randomly selected minibatch47             mb_data = data[:,rp[i:i+minibatch]]48             mb_labels = labels[rp[i:i+minibatch]]49             # evaluate the objective function on the next minibatch50             cost,grad = funcObj(theta,mb_data,mb_labels)51             '''52              Instructions: Add in the weighted velocity vector to the53              gradient evaluated above scaled by the learning rate.54              Then update the current weights theta according to the55              sgd update rule 56             '''   57             velocity=mom*velocity+alpha*grad58             theta=theta-velocity  59             print('Epoch %d: Cost on iteration %d is %f\n' %(e,it,cost))  60         #aneal learning rate by factor of two after each epoch61         alpha = alpha/2.062     63     return theta
View Code

 

测试:

使用MNIST数据集进行测试,正确率在96%左右。

测试代码:

1 if __name__ == '__main__': 2     X = np.load('../../common/trainImages.npy') / 255 3     X = X.T 4     y = np.load('../../common/trainLabels.npy') 5     ''' 6     X1=X[:,:10] 7     y1=y[:10] 8     checkGradient(X1,y1)  9     '''10     Xtest = np.load('../../common/testImages.npy') / 25511     Xtest = Xtest.T12     ytest = np.load('../../common/testLabels.npy') 13     params = dict()14     params['inputSize']=X.shape[0]15     params['outputSize']=1016     params['layerSizes']=[256,10]    17     params['Lambda']=018     params['activitionFunc']='sigmoid'  19     20     nn=MNN(params)21     t0=time()22     nn.train(X, y)23     print('training Time %.5f s' %(time()-t0))24     print('test acc :%.3f%%' % (nn.performance(Xtest,ytest)))25
View Code

 

存在的问题:

  1.使用scipy.optimize中的fmin_cg和fmin_l_bfgs_b两个函数进行优化时,只有一个隐藏层没有问题,能得到想要的结果,但隐藏层多于一层的时候却得不到正确的结果,迭代次数只有个位数。而使用梯度下降或随机梯度下降法,对于多隐藏层的模型是可以得到想要的结果的。不知道是我实现的神经网络有问题还是scipy.optimize得问题!

  2.在代码中对代价函数和梯度没有使用惩罚项。由于输出层采用softmax(固定最后一类的输出为0,不使用惩罚项),不知道是否要对隐藏层的参数进行规范化。不过从实际的结果看,不加任何惩罚项,其结果和使用二次代价函数+惩罚项的结果差不多。

posted on
2015-02-04 16:13 阅读(
...) 评论(
...)

转载于:https://www.cnblogs.com/arsenicer/p/4272649.html

你可能感兴趣的文章
如果类型是dynamic的且其属性也是dynamic的
查看>>
liinux 下mysql常用命令
查看>>
如何用SQL语句查询Excel数据?
查看>>
用js实现分页功能以及利用xml实现分页功能——数据岛
查看>>
《Programming in Lua 3》读书笔记(十九)
查看>>
html知识点总结
查看>>
博客园
查看>>
Three.js加载3D模型
查看>>
Basic: Fisher's transform
查看>>
mint-ui vue双向绑定
查看>>
Eclipse快捷键一览表(转载)
查看>>
随机访问流 RandomAccessFile
查看>>
图片上传读取代码
查看>>
服务器安装环境基本操作
查看>>
nslookup命令
查看>>
c++函数模板作为类的成员函数,编译报错LNK2019的解决方法
查看>>
Tensorflow安装
查看>>
判断个十百千位之后是否大于20 java——CSDN博客
查看>>
.Net插件编程模型:MEF和MAF[转载]
查看>>
java实现excel的导入导出(poi详解)[转]
查看>>