Deep Learning Script Precision Errors

Question

0 投票

Hi,

I am new to matlab. Below is my deep learning script which was originally prepared for a 5*5 matrix input and 5*1 matrix output. I updated the weight matrices and input and outputs based on my data. This trains well for a few samples than due to high number of precisions NaN values occur.

I tried to used vpa on multiple places to eliminate this issue. This helped to decrease the number of NaN's in the matrixes, however there are still problems. I also tried to use round ( to digits) with no success.

Probably my question has a very simple solution which I don't know. Specially, I wonder how do deep learning script writers solve the Inf and Nan problems during training of large matrixes.

I am pasting part of my code. If you request I may upload other parts of the project.

function [w1, w2, w3, w4] = DeepLearning(w1, w2, w3, w4, inputMatrix, correct_Output)
 
digits(150);
alpha = 0.01; %to control the learning rate
nRows = size(inputMatrix, 1); 
 
for k = 1:nRows
 
%reshaped_input_Image = reshape(input_Image(:,:,k), 25, 1);
subMatrix = inputMatrix(k:k, :);
%disp("subMatrix");
%disp(subMatrix);
disp("W1");
disp(w1);
input_of_hidden_layer1 = vpa(w1*transpose(subMatrix), 150);
 
output_of_hidden_layer1 = vpa(ReLU(input_of_hidden_layer1), 150);
disp("W2");
disp(w2);
 
input_of_hidden_layer2 = vpa(w2* output_of_hidden_layer1, 150);
 
output_of_hidden_layer2 = vpa(ReLU(input_of_hidden_layer2), 150);
disp("W3");
disp(w3);
input_of_hidden_layer3 = vpa(w3* output_of_hidden_layer2, 150);
 
output_of_hidden_layer3 = vpa(ReLU(input_of_hidden_layer3), 150);
disp("W4");
disp(w4);
   
disp("output_of_hidden_layer3");
disp(output_of_hidden_layer3);
 
input_of_output_node = vpa(w4* output_of_hidden_layer3, 150);
 
disp("input_of_output_node");
disp(input_of_output_node);
final_output = vpa(Softmax(input_of_output_node), 150);
disp("final_output");
disp(final_output);
correct_Output_transpose = vpa(correct_Output(k:k, :)', 150);
disp("correct_Output_transpose");
disp(correct_Output_transpose);
error = vpa(correct_Output_transpose - final_output, 150);
disp("error");
disp(error);
delta = error;
disp("delta");
disp(delta);
 
error_of_hidden_layer3 = vpa(w4'*delta, 150);
 
delta3 = vpa((input_of_hidden_layer3>0).*error_of_hidden_layer3, 150);
disp("input_of_hidden_layer3");
disp(input_of_hidden_layer3);
disp("input_of_hidden_layer3>0");
disp(input_of_hidden_layer3>0);
disp("delta3");
disp(delta3);
error_of_hidden_layer2 = vpa(w3' * delta3, 150);
disp("error_of_hidden_layer2");
disp(error_of_hidden_layer2);
disp("input_of_hidden_layer2>0");
disp(input_of_hidden_layer2>0);
 
delta2 = vpa(round(((input_of_hidden_layer2>0).*error_of_hidden_layer2),150), 150);
disp("delta2");
disp(delta2);
 
error_of_hidden_layer1 = vpa(w2'*delta2, 150);
 
delta1 = vpa((input_of_hidden_layer1>0).*error_of_hidden_layer1, 150);
 
adjustment_of_w4 = vpa(alpha*delta*output_of_hidden_layer3', 150);
 
adjustment_of_w3 = vpa(alpha*delta3*output_of_hidden_layer2', 150);
adjustment_of_w2 = vpa(alpha*delta2*output_of_hidden_layer1', 150);
 
adjustment_of_w1 = vpa( alpha*delta1*transpose(subMatrix)', 150);
w1 = vpa(w1+ adjustment_of_w1, 150);
 
w2 = vpa(w2+ adjustment_of_w2, 150);
 
w3 = vpa(w3+ adjustment_of_w3, 150);
 
w4 = vpa(w4+ adjustment_of_w4, 150);
NrNaNW1 = sum(isnan(w1(:)));
NrNaNW2 = sum(isnan(w2(:)));
NrNaNW3 = sum(isnan(w3(:)));
NrNaNW4 = sum(isnan(w4(:)));
if(NrNaNW1 >0 || NrNaNW2 >0 || NrNaNW3 >0 || NrNaNW4 >0 )
    disp("NAN olustu k=" + k);    
end
 disp(k + ". sample tamamland?");  
 
end
 
end