我正在尝试训练一个非常简单的神经网络,它具有2个隐藏层(例如),每个都有5个神经元。
由于某种原因,我的成本函数(选择为交叉熵,但并不重要)对于某个数据集总是在增加。
这是我的代码-
import theano_net as N
from load import mnist
import theano
from theano import tensor as T
import numpy as np
import cv2
def floatX(X):
return np.asarray(X,dtype=theano.config.floatX)
def init_weights(shape):
return theano.shared(floatX(np.random.randn(*shape)*0.01))
def appendGD(params, grad, step):
updates = []
for p,g in zip(params,grad):
updates.append([p, p - (g * step)])
return updates
def model(X,w1,w2,wo):
h0 = X
z1 = T.dot(h0, w1.T) ## n on m1
h1 = T.nnet.sigmoid(z1)
z2 = T.dot(h1, w2.T) ## n on m2
h2 = T.nnet.sigmoid(z2)
zo = T.dot(h2, wo.T)
return T.nnet.softmax(zo)
numOfTrainPics = 4872
numOfTestPics = 382
numOfPixels = 40000
numOfLabels = 6
trX = np.zeros((numOfTrainPics,numOfPixels))
trY = np.zeros((numOfTrainPics,numOfLabels))
teX = np.zeros((numOfTestPics,numOfPixels))
teY = np.zeros((numOfTestPics,numOfLabels))
for i in range(1,4873): #generate trX and trY
img = cv2.imread('C:\\Users\\Oria\\Desktop\\Semester B\\Computer Vision Cornel 2016\\Train\\Train\\%s.jpg' %(i))
img = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
img = np.reshape(img,(1,numOfPixels))
trX[i-1,:] = img
if(i < 1330):
trY[i-1,0] = 1
if(i > 1329)&(i < 1817):
trY[i-1,1] = 1
if(i > 1816)&(i < 2389):
trY[i-1,2] = 1
if(i > 2388)&(i < 3043):
trY[i-1,3] = 1
if(i > 3042)&(i < 4438):
trY[i-1,4] = 1
if(i > 4437)&(i < 4873):
trY[i-1,5] = 1
for i in range(1,383):
img = cv2.imread('C:\\Users\\Oria\\Desktop\\Semester B\\Computer Vision Cornel 2016\\Test\\Test\\%s.jpg' %(i))
img = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
img = np.reshape(img,(1,numOfPixels))
teX[i-1,:] = img
if(i < 59):
teY[i-1,0] = 1
if(i > 58)&(i < 120):
teY[i-1,1] = 1
if(i > 119)&(i < 185):
teY[i-1,2] = 1
if(i > 184)&(i < 261):
teY[i-1,3] = 1
if(i > 260)&(i < 326):
teY[i-1,4] = 1
if(i > 325)&(i < 383):
teY[i-1,5] = 1
print "matrices generated"
###
x = T.fmatrix()
y = T.fmatrix()
step = 0.1
m1 = 5
m2 = 5
w1 = init_weights((m1, numOfPixels))
w2 = init_weights((m2, m1))
wo = init_weights((numOfLabels, m2))
temp = model(x, w1, w2, wo)
predictions = T.argmax(temp, axis= 1)
cost = T.mean(T.nnet.categorical_crossentropy(temp, y))
params = [w1, w2, wo]
gradient = T.grad(cost=cost, wrt = params)
update = appendGD(params, gradient, step)
train = theano.function(inputs = [x,y], outputs = cost, updates = update, allow_input_downcast=True)
predict = theano.function(inputs=[x],outputs=[predictions],allow_input_downcast=True)
for i in range(10000):
for start, end in zip(range(0,len(trX),241),range(241,len(trX),241)):
cost = train(trX[start:end], trY[start:end])
print cost
对于我在此代码中加载的trX,trY,我的成本函数始终在增加。
但是,当我运行相同的代码,但trX和trY来自MNIST数据集时,它可以正常工作,成本函数正在降低。
我不明白为什么会这样以及如何解决。
一个提示可能是,当我说说mnist数据集的trX的第一行(第一张图片)时,它是一个非常稀疏的矩阵,并且非零元素都在0到1之间。
[ 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.01176471
0.07058824 0.07058824 0.07058824 0.49411765 0.53333333 0.68627451
0.10196078 0.65098039 1. 0.96862745 0.49803922 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0.11764706 0.14117647 0.36862745
0.60392157 0.66666667 0.99215686 0.99215686 0.99215686 0.99215686
0.99215686 0.88235294 0.6745098 0.99215686 0.94901961 0.76470588
0.25098039 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.19215686
0.93333333 0.99215686 0.99215686 0.99215686 0.99215686 0.99215686
0.99215686 0.99215686 0.99215686 0.98431373 0.36470588 0.32156863
0.32156863 0.21960784 0.15294118 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0.07058824 0.85882353 0.99215686 0.99215686 0.99215686
0.99215686 0.99215686 0.77647059 0.71372549 0.96862745 0.94509804
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0.31372549 0.61176471
0.41960784 0.99215686 0.99215686 0.80392157 0.04313725 0.
0.16862745 0.60392157 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0.05490196 0.00392157 0.60392157 0.99215686 0.35294118 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0.54509804 0.99215686 0.74509804 0.00784314
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0.04313725 0.74509804 0.99215686
0.2745098 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.1372549
0.94509804 0.88235294 0.62745098 0.42352941 0.00392157 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0.31764706 0.94117647 0.99215686 0.99215686 0.46666667 0.09803922
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0.17647059 0.72941176 0.99215686 0.99215686
0.58823529 0.10588235 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0.0627451 0.36470588
0.98823529 0.99215686 0.73333333 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0.97647059 0.99215686 0.97647059 0.25098039 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0.18039216 0.50980392
0.71764706 0.99215686 0.99215686 0.81176471 0.00784314 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0.15294118 0.58039216 0.89803922
0.99215686 0.99215686 0.99215686 0.98039216 0.71372549 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0.09411765 0.44705882 0.86666667 0.99215686
0.99215686 0.99215686 0.99215686 0.78823529 0.30588235 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0.09019608 0.25882353 0.83529412 0.99215686 0.99215686
0.99215686 0.99215686 0.77647059 0.31764706 0.00784314 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0.07058824 0.67058824 0.85882353 0.99215686 0.99215686 0.99215686
0.99215686 0.76470588 0.31372549 0.03529412 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0.21568627 0.6745098 0.88627451 0.99215686 0.99215686 0.99215686
0.99215686 0.95686275 0.52156863 0.04313725 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0.53333333 0.99215686 0.99215686 0.99215686 0.83137255
0.52941176 0.51764706 0.0627451 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. ]
但是,当我在代码中加载数据库im的情况下查看trX时,trX [0]大多为非零值,且元素介于0和255之间。
我只想在我的数据库上训练神经网络。它应该不太困难,并且该代码已被证明可以与MNIST一起使用。我只是不了解如何正确地实际加载我的数据集。
最佳答案
您必须将输入数据标准化为[0,1]或[-1,1]范围,因为您已经在MNIST数据集中完成了此操作。
没有标准化,训练神经网络就困难得多。
您可以轻松地做到这一点,方法是减去数据集的平均值,然后除以标准差,或者只进行最小-最大归一化,这将得到[0,1]范围。
对于每通道8位图像,您可以将每个像素除以255,以获得[0,1]范围,通常足以训练NN,但并非总是如此。