network3py.docx
- 文档编号:29846087
- 上传时间:2023-07-27
- 格式:DOCX
- 页数:8
- 大小:18.54KB
network3py.docx
《network3py.docx》由会员分享,可在线阅读,更多相关《network3py.docx(8页珍藏版)》请在冰豆网上搜索。
network3py
"""network3.py
~~~~~~~~~~~~~~
ATheano-basedprogramfortrainingandrunningsimpleneural
networks.
Supportsseverallayertypes(fullyconnected,convolutional,max
pooling,softmax),andactivationfunctions(sigmoid,tanh,and
rectifiedlinearunits,withmoreeasilyadded).
WhenrunonaCPU,thisprogramismuchfasterthannetwork.pyand
network2.py.However,unlikenetwork.pyandnetwork2.pyitcanalso
berunonaGPU,whichmakesitfasterstill.
BecausethecodeisbasedonTheano,thecodeisdifferentinmany
waysfromnetwork.pyandnetwork2.py.However,wherepossibleIhave
triedtomaintainconsistencywiththeearlierprograms.In
particular,theAPIissimilartonetwork2.py.NotethatIhave
focusedonmakingthecodesimple,easilyreadable,andeasily
modifiable.Itisnotoptimized,andomitsmanydesirablefeatures.
ThisprogramincorporatesideasfromtheTheanodocumentationon
convolutionalneuralnets(notably,
),fromMishaDenil's
implementationofdropout(),and
fromChrisOlah(http:
//colah.github.io).
"""
####Libraries
#Standardlibrary
importcPickle
importgzip
#Third-partylibraries
importnumpyasnp
importtheano
importtheano.tensorasT
fromtheano.tensor.nnetimportconv
fromtheano.tensor.nnetimportsoftmax
fromtheano.tensorimportshared_randomstreams
fromtheano.tensor.signalimportdownsample
#Activationfunctionsforneurons
deflinear(z):
returnz
defReLU(z):
returnT.maximum(0.0,z)
fromtheano.tensor.nnetimportsigmoid
fromtheano.tensorimporttanh
####Constants
GPU=False
ifGPU:
print"TryingtorununderaGPU.Ifthisisnotdesired,thenmodify"+\
"network3.py\ntosettheGPUflagtoFalse."
try:
theano.config.device='gpu'
except:
pass#it'salreadyset
theano.config.floatX='float32'
else:
print"RunningwithaCPU.Ifthisisnotdesired,thenthemodify"+\
"network3.pytoset\ntheGPUflagtoTrue."
####LoadtheMNISTdata
defload_data_shared(filename="../data/mnist.pkl.gz"):
f=gzip.open(filename,'rb')
training_data,validation_data,test_data=cPickle.load(f)
f.close()
defshared(data):
"""Placethedataintosharedvariables.ThisallowsTheanotocopy
thedatatotheGPU,ifoneisavailable.
"""
shared_x=theano.shared(
np.asarray(data[0][0:
10],dtype=theano.config.floatX),borrow=True)
shared_y=theano.shared(
np.asarray(data[1][0:
10],dtype=theano.config.floatX),borrow=True)
returnshared_x,T.cast(shared_y,"int32")
return[shared(training_data),shared(validation_data),shared(test_data)]
####Mainclassusedtoconstructandtrainnetworks
classNetwork(object):
def__init__(self,layers,mini_batch_size):
"""Takesalistof`layers`,describingthenetworkarchitecture,and
avalueforthe`mini_batch_size`tobeusedduringtraining
bystochasticgradientdescent.
"""
self.layers=layers
self.mini_batch_size=mini_batch_size
self.params=[paramforlayerinself.layersforparaminlayer.params]
self.x=T.matrix("x")
self.y=T.ivector("y")
init_layer=self.layers[0]
init_layer.set_inpt(self.x,self.x,self.mini_batch_size)
forjinxrange(1,len(self.layers)):
prev_layer,layer=self.layers[j-1],self.layers[j]
layer.set_inpt(
prev_layer.output,prev_layer.output_dropout,self.mini_batch_size)
self.output=self.layers[-1].output
self.output_dropout=self.layers[-1].output_dropout
defSGD(self,training_data,epochs,mini_batch_size,eta,
validation_data,test_data,lmbda=0.0):
"""Trainthenetworkusingmini-batchstochasticgradientdescent."""
training_x,training_y=training_data
validation_x,validation_y=validation_data
test_x,test_y=test_data
#computenumberofminibatchesfortraining,validationandtesting
num_training_batches=size(training_data)/mini_batch_size
num_validation_batches=size(validation_data)/mini_batch_size
num_test_batches=size(test_data)/mini_batch_size
#definethe(regularized)costfunction,symbolicgradients,andupdates
l2_norm_squared=sum([(layer.w**2).sum()forlayerinself.layers])
cost=self.layers[-1].cost(self)+0.5*lmbda*l2_norm_squared/num_training_batches
grads=T.grad(cost,self.params)
updates=[(param,param-eta*grad)
forparam,gradinzip(self.params,grads)]
#definefunctionstotrainamini-batch,andtocomputethe
#accuracyinvalidationandtestmini-batches.
i=T.lscalar()#mini-batchindex
train_mb=theano.function(
[i],cost,updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size:
(i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size:
(i+1)*self.mini_batch_size]
})
validate_mb_accuracy=theano.function(
[i],self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size:
(i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size:
(i+1)*self.mini_batch_size]
})
test_mb_accuracy=theano.function(
[i],self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i*self.mini_batch_size:
(i+1)*self.mini_batch_size],
self.y:
test_y[i*self.mini_batch_size:
(i+1)*self.mini_batch_size]
})
self.test_mb_predictions=theano.function(
[i],self.layers[-1].y_out,
givens={
self.x:
test_x[i*self.mini_batch_size:
(i+1)*self.mini_batch_size]
})
#Dotheactualtraining
best_validation_accuracy=0.0
forepochinxrange(epochs):
forminibatch_indexinxrange(num_training_batches):
iteration=num_training_batches*epoch+minibatch_index
ifiteration%1000==0:
print("Trainingmini-batchnumber{0}".format(iteration))
cost_ij=train_mb(minibatch_index)
if(iteration+1)%num_training_batches==0:
validation_accuracy=np.mean(
[validate_mb_accuracy(j)forjinxrange(num_validation_batches)])
print("Epoch{0}:
validationaccuracy{1:
.2%}".format(
epoch,validation_accuracy))
ifvalidation_accuracy>=best_validation_accuracy:
print("Thisisthebestvalidationaccuracytodate.")
best_validation_accuracy=validation_accuracy
best_iteration=iteration
iftest_data:
test_accuracy=np.mean(
[test_mb_accuracy(j)forjinxrange(num_test_batches)])
print('Thecorrespondingtestaccuracyis{0:
.2%}'.format(
test_accuracy))
print("Finishedtrainingnetwork.")
print("Bestvalidationaccuracyof{0:
.2%}obtainedatiteration{1}".format(
best_validation_accuracy,best_iteration))
print("Correspondingtestaccuracyof{0:
.2%}".format(test_accuracy))
####Definelayertypes
classConvPoolLayer(object):
"""Usedtocreateacombinationofaconvolutionalandamax-pooling
layer.Amoresophisticatedimplementationwouldseparatethe
two,butforourpurposeswe'llalwaysusethemtogether,andit
simplifiesthecode,soitmakessensetocombinethem.
"""
def__init__(self,filter_shape,image_shape,poolsize=(2,2),
activation_fn=sigmoid):
"""`filter_shape`isatupleoflength4,whoseentriesarethenumber
offilters,thenumberofinputfeaturemaps,thefilterheight,andthe
filterwidth.
`image_shape`isatupleoflength4,whoseentriesarethe
mini-batchsize,thenumberofinputfeaturemaps,theimage
height,andtheimagewidth.
`poolsize`isatupleoflength2,whoseentriesaretheyand
xpoolingsizes.
"""
self.filter_shape=filter_shape
self.image_shape=image_shape
self.poolsize=poolsize
self.activation_fn=activation_fn
#initializeweightsandbiases
n_out=(filter_shape[0]*np.prod(filter_shape[2:
])/np.prod(poolsize))
self.w=theano.shared(
np.asarray(
np.random.normal(loc=0,scale=np.sqrt(1.0/n_out),size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
self.b=theano.shared(
np.asarray(
np.random.normal(loc=0,scale=1.0,size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
self.params=[self.w,self.b]
defset_inpt(self,inpt,inpt_dropout,mini_batch_size):
self.inpt=inpt.reshape(self.image_shape)
conv_out=conv.conv2d(
input=self.inpt,filters=self.w,filter_shape=self.filter_shape,
image_shape=self.image_shape)
pooled_out=downsample.max_pool_2d(
input=conv_out,ds=self.poolsize,ignore_border=True)
self.output=self.activation_fn(
pooled_out+self.b.dimshuffle('x',0,'x','x'))
self.output_dropout=self.output#nodropoutintheconvolutionallayers
classFullyConnectedLayer(object):
def__init__(self,n_in,n_out,activation_fn=sigmoid,p_dropout=0.0):
self.n_in=n_in
self.n_out=n_out
self.activation_fn=activation_fn
self.p_dropout=p_dropout
#Initializeweightsandbiases
self.w=theano.shared(
np.asarray(
np.random.normal(
loc=0.0,scale=np.sqrt(1.0/n_out),size=(n_in,n_out)),
dtype=theano.config.floatX),
name='w',borrow=True)
self.b=theano.shared(
np.asarray(np.random.normal(loc=0.0,scale=1.0,size=(n_out,)),
dtype=theano.config.floatX),
name='b',borrow=True)
self.params=[self.w,self.b]
defset_inpt(self,inpt,inpt_dropout,mini_batch_size):
self.inpt=inpt.reshape((mini_batch_size,self.n_in))
self.output=self.activation_fn(
(1-self.p_dropout)*T.dot(self.inpt,self.w)+self.b)
self.y_out=T.argmax(self.output,axis=1)
self.inpt_dropout=dropout_layer(
inpt_dropout.reshape((mini_batch_size,self.n_in)),self.p_dropout)
self.output_dropout=self.activation_fn(
T.dot(self.inpt_dropout,self.w)+self.b)
defaccuracy(self,y):
"Returntheaccuracyforthemini-batch."
returnT.mean(T.eq(y,self.y_out))
classSoftmaxLayer(object):
def__init__(self,n_in,n_out,p_dropout=0.0):
self.n_in=n_in
self.n_out=n_out
self.p_dropout=p_dropout
#Initializeweightsandbiases
self.w=theano.shared(
np.zeros((n_in,n_out),
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- network3py