SAE代码.docx
- 文档编号:6497801
- 上传时间:2023-01-07
- 格式:DOCX
- 页数:13
- 大小:21.73KB
SAE代码.docx
《SAE代码.docx》由会员分享,可在线阅读,更多相关《SAE代码.docx(13页珍藏版)》请在冰豆网上搜索。
SAE代码
Arial, Helvetica, sans-serif;">function test_example_SAE
1.load mnist_uint8;
2.
3.train_x = double(train_x)/255;
4.test_x = double(test_x)/255;
5.train_y = double(train_y);
6.test_y = double(test_y); //将数据一开始初始化
7.
8.%% ex1 train a 100 hidden unit SDAE and use it to initialize a FFNN
9.% Setup and train a stacked denoising autoencoder (SDAE)
10.rand('state',0)
11.sae = saesetup([784 100]);
这里跳入saesetup函数,由函数可知返回的是sae的结构体
1.function sae = saesetup(size)
2. for u = 2 :
numel(size) //numel(size)=2
3. sae.ae{u-1} = nnsetup([size(u-1) size(u) size(u-1)]); %size
(1)=784 size
(2)=100 size(3)=784
4. end
5.end
这里调用了nnsetup函数,由该函数可知返回的也是nn结构体,可以看出训练后是把nn替代成sae.
1.function nn = nnsetup(architecture)
2.%NNSETUP creates a Feedforward Backpropagate Neural Network
3.% nn = nnsetup(architecture) returns an neural network structure with n=numel(architecture)
4.% layers, architecture being a n x 1 vector of layer sizes e.g. [784 100 10]
5.
6. nn.size = architecture; //architecture表示每一层由多少个神经元,总共有多少层(3)
7. nn.n = numel(nn.size);//网络层数3
8.
9. nn.activation_function = 'tanh_opt'; % Activation functions of hidden layers:
'sigm' (sigmoid) or 'tanh_opt' (optimal tanh).
10. nn.learningRate = 2; % learning rate Note:
typically needs to be lower when using 'sigm' activation function and non-normalized inputs.
11. nn.momentum = 0.5; % Momentum
12. nn.scaling_learningRate = 1; % Scaling factor for the learning rate (each epoch)
13. nn.weightPenaltyL2 = 0; % L2 regularization
14. nn.nonSparsityPenalty = 0; % Non sparsity penalty
15. nn.sparsityTarget = 0.05; % Sparsity target
16. nn.inputZeroMaskedFraction = 0; % Used for Denoising AutoEncoders
17. nn.dropoutFraction = 0; % Dropout level (http:
//www.cs.toronto.edu/~hinton/absps/dropout.pdf)
18. nn.testing = 0; % Internal variable. nntest sets this to one.
19. nn.output = 'sigm'; % output unit 'sigm' (=logistic), 'softmax' and 'linear'
20. //对每一层的网络结构进行初始化,一共三个参数W,vW,p,其中W是主要的参数
21. //vW是更新参数时的临时参数,p是所谓的sparsity,
22. for i = 2 :
nn.n %生成两层权值和p{i}
23. % weights and weight momentum
24. nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1)));
Arial, Helvetica, sans-serif;">//
Arial, Helvetica, sans-serif;">随机取从-0.5到 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1)))的权值序列
25. nn.vW{i - 1} = zeros(size(nn.W{i - 1}));
Arial, Helvetica, sans-serif;">//
Arial, Helvetica, sans-serif;">使vW与W空间相同,但为0矩阵
26.
27. % average activations (for use with sparsity)
28. nn.p{i} = zeros(1, nn.size(i)); //生成两个空矩阵,p{i}用来表示隐藏神经元j的平均活跃度(详情可见UFLDL教程)
29. end
30.end
程序跳回这一段
1.sae.ae{1}.activation_function = 'sigm';
2.sae.ae{1}.learningRate = 1;
3.sae.ae{1}.inputZeroMaskedFraction = 0.5;
Arial, Helvetica, sans-serif;">//
Arial, Helvetica, sans-serif;">修改sae里面的各个参数
4.opts.numepochs = 1;
5.opts.batchsize = 100;
6.sae = saetrain(sae, train_x, opts);
这里将nn里的各个参数在sae里部分更改,然后又跳到saetrain函数
1.function sae = saetrain(sae, x, opts)
2. for i = 1 :
numel(sae.ae);
3. disp(['Training AE ' num2str(i) '/' num2str(numel(sae.ae))]);//训练到第几代
4. sae.ae{i} = nntrain(sae.ae{i}, x, x, opts);
5. t = nnff(sae.ae{i}, x, x);
6. x = t.a{2};
7. %remove bias term
8. x = x(:
2:
end); //把第一列去掉
9. end
10.end
这里转到nntrain函数,跳过前面的assert判定
1.loss.train.e = [];
2.loss.train.e_frac = [];
3.loss.val.e = [];
4.loss.val.e_frac = [];
5.opts.validation = 0;
6.if nargin == 6
7. opts.validation = 1;
8.end
9.
10.fhandle = [];
11.if isfield(opts,'plot') && opts.plot == 1 //检查结构体opts是否包含由‘plot’指定的域,如果包含则返回逻辑1
12. fhandle = figure();
13.end
14.
15.m = size(train_x, 1);
16.//m是训练样本的数量
17.//注意在调用的时候我们设置了opt,batchsize是做batch gradient时候的大小
18.batchsize = opts.batchsize;
19.numepochs = opts.numepochs;//表示循环的次数
20.
21.numbatches = m / batchsize;
22.
23.assert(rem(numbatches, 1) == 0, 'numbatches must be a integer');
1.L = zeros(numepochs*numbatches,1);
2.n = 1;
1.for i = 1 :
numepochs
2. tic;
3.
4. kk = randperm(m); //把1到m这些数随机打乱得到的一个数字序列。
5. for l = 1 :
numbatches
6. batch_x = train_x(kk((l - 1) * batchsize + 1 :
l * batchsize), :
); //一批一批进行训练,每一批数目为batchsize,即600
7.
8. //Add noise to input (for use in denoising autoencoder) 加入noise,这是denoising autoencoder需要使用到的部分
9. if(nn.inputZeroMaskedFraction ~= 0) //请参见《Extracting and Composing Robust Features with Denoising Autoencoders》这篇论文
10. batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction);//具体加入的方法就是把训练样例中的一些数据调整变为0,inputZeroMaskedFraction表示了调整的比例
11. end
12.
13. batch_y = train_y(kk((l - 1) * batchsize + 1 :
l * batchsize), :
); //同理对y也进行一批一批的调用,与前面的batch_x对应
14.
15. nn = nnff(nn, batch_x, batch_y);
16. nn = nnbp(nn);
17. nn = nnapplygrads(nn);
18.
19. L(n) = nn.L; //nn最后结果
20.
21. n = n + 1;
22. end
23.
24. t = toc; //这里计算出整个运算过程用了多少second
25.
26.
27. if opts.validation == 1
28. loss = nneval(nn, loss, train_x, train_y, val_x, val_y);
29. str_perf = sprintf('; Full-batch train mse = %f, val mse = %f', loss.train.e(end), loss.val.e(end));
30. else
31. loss = nneval(nn, loss, train_x, train_y);
32. str_perf = sprintf('; Full-batch train err = %f', loss.train.e(end));
33. end
34. if ishandle(fhandle)
35. nnupdatefigures(nn, fhandle, loss, opts, i);
36. end
37.
38. disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):
(n-1)))) str_perf]);
39. nn.learningRate = nn.learningRate * nn.scaling_learningRate; //加速学习速率
40.end
41.end
函数转为nnff,意为前向传播算法
1.function nn = nnff(nn, x, y)
2.%NNFF performs a feedforward pass
3.% nn = nnff(nn, x, y) returns an neural network structure with updated
4.% layer activations, error and loss (nn.a, nn.e and nn.L)
5.
6. n = nn.n;
7. m = size(x, 1);
8.
9. x = [ones(m,1) x];
10. nn.a{1} = x;
11.
12. //feedforward pass
13. for i = 2 :
n-1
14. //根据选择的激活函数不同进行正向传播计算
15. //可以回过头看nnsetup里面的第一个参数activation_function
16. //sigm就是sigmoid
17. switch nn.activation_function
18. case 'sigm'
19. % Calculate the unit's outputs (including the bias term)
20. nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}');
21. case 'tanh_opt'
22. nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');
23. end
24.
25. //dropout计算部分 dropoutFraction是nnsetup中可以设置的一个参数
26. if(nn.dropoutFraction > 0) //>0则执行,去除偏差较大的部分
27. if(nn.testing)
28. nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction);
29. else
30. nn.dropOutMask{i} = (rand(size(nn.a{i}))>nn.dropoutFraction);
31. nn.a{i} = nn.a{i}.*nn.dropOutMask{i};
32. end
33. end
34. //计算sparsity,nonSparsityPenalty是对没达到sparsitytarget的参数的惩罚系数
35. //calculate running exponential activations for use with sparsity
36. if(nn.nonSparsityPenalty>0) //>0则执行
37. nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1);
38. end
39.
40. //Add the bias term
41. nn.a{i} = [ones(m,1) nn.a{i}];
42. end
43. switch nn.output //输出层的结果
44. case 'sigm'
45. nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}');
46. case 'linear'
47. nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
48. case 'softmax'
49. nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
50. nn.a{n} = exp(bsxfun(@minus, nn.a{n}, max(nn.a{n},[],2)));
51. nn.a{n} = bsxfun(@rdivide, nn.a{n}, sum(nn.a{n}, 2));
52. end
53.
54. //error and loss
55. //计算error (计算输出层的e)
56. nn.e = y - nn.a{n}; %y-H w,b(x)
57.
58. switch nn.output
59. case {'sigm', 'linear'}
60. nn.L = 1/2 * sum(sum(nn.e .^ 2)) / m;//见公式P9(UFLDL)
61. case 'softmax'
62. nn.L = -sum(sum(y .* log(nn.a{n}))) / m;
63. end
64.end
接下来跳转到nnbp函数
1.function nn = nnbp(nn)
2.//NNBP performs backpropagation
3.// nn = nnbp(nn) returns an neural network structure with updated weights
4.
5. n = nn.n;
6. sparsityError = 0;
7. switch nn.output
8. case 'sigm'
9. d{n} = - nn.e .* (nn.a{n} .* (1 - nn.a{n})); //见UFLDL反向传导算法公式2
10. case {'softmax','linear'}
11. d{n} = - nn.e;
12.
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- SAE 代码