%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Matlab script to perform cross validation on numeric data from agricult. % purpose: data mining with ANNs/MLPs % see http://blog.georgruss.de/?p=53 % ----- % script to generate plots and determine optimal network % for agriculture data % ----- % Georg Ru\ss % russ@iws.cs.uni-magdeburg.de % 2007-11-30 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Preparation steps, workspace % clean workspace clear all; % set clock clock_start = clock; % seed random for reproducible results rand('seed',1); %pause; % change paths to wherever your data is located and readable to matlab % uses script readColData from % http://web.cecs.pdx.edu/~gerry/MATLAB/plotting/loadingPlotData.html#colHeadings [label,id_column,data]=readColData('sorted_all',10,10,1); %% data specific stuff % generate three data sets for the nnet to play with % one: N1, Yield2003, EM38 -- target: Yield2004 % two: N1, Yield2003, EM38, N2, REIP32 -- target: Yield2004 % three: N1, Yield2003, EM38, N2, REIP32, N3, REIP49 -- target: Yield2004 % works by eliminating the respective columns from the 'data' matrix above set_1 = data; set_1(:,7) = []; set_1(:,5) = []; set_1(:,4) = []; set_1(:,3) = []; set_1(:,2) = []; set_2 = data; set_2(:,7) = []; set_2(:,5) = []; set_2(:,3) = []; set_3 = data; set_3(:,7) = []; %% store dimensions of data for possible later loops %[number_of_examples_cv,number_of_attributes_kv] = size(crossv); %% Prepare the data for the neural net % put input attributes and target values into suitable matrices % requires selecting and transposing the raw input data size_set_1 = size(set_1); size_set_2 = size(set_2); size_set_3 = size(set_3); set_1_examples = transpose(set_1(:,1:size_set_1(1,2)-1)); set_1_targets = transpose(set_1(:,size_set_1(1,2))); set_2_examples = transpose(set_2(:,1:size_set_2(1,2)-1)); set_2_targets = transpose(set_2(:,size_set_2(1,2))); set_3_examples = transpose(set_3(:,1:size_set_3(1,2)-1)); set_3_targets = transpose(set_3(:,size_set_3(1,2))); % data division stuff %[set_1_train,set_1_val,set_1_test,set_1_trainInd,set_1_valInd,set_1_testInd] = dividerand(set_1_examples) %pause % max size of first hidden layer max_size_first_layer = 32; % max size of second hidden layer max_size_second_layer = 32; net_1_perf_collection = zeros(max_size_first_layer,max_size_second_layer); net_2_perf_collection = zeros(max_size_first_layer,max_size_second_layer); net_3_perf_collection = zeros(max_size_first_layer,max_size_second_layer); %create network for i = 2:max_size_first_layer; for j = 2:max_size_second_layer; % %%%%%%%%%%%%% % network for first data set net_1 = newff(set_1_examples,set_1_targets,[i j]); net_1.trainparam.min_grad = 0.001; net_1.trainParam.epochs = 100000; net_1.trainParam.lr = 0.5; net_1.trainParam.show = NaN; % training step [net_1_trained,net_1_tr] = train(net_1,set_1_examples,set_1_targets); % determine mse on test data (from network training) net_1_perf = net_1_tr.tperf(1,size(net_1_tr.tperf,2)); net_1_perf_collection(i,j) = net_1_perf; % %%%%%%%%%%%%% % network for second data set net_2 = newff(set_2_examples,set_2_targets,[i j]); net_2.trainparam.min_grad = 0.001; net_2.trainParam.epochs = 100000; net_2.trainParam.lr = 0.5; net_2.trainParam.show = NaN; % training step [net_2_trained,net_2_tr] = train(net_2,set_2_examples,set_2_targets); % determine mse on test data (from network training) net_2_perf = net_2_tr.tperf(1,size(net_2_tr.tperf,2)); net_2_perf_collection(i,j) = net_2_perf; % %%%%%%%%%%%%% % network for third data set net_3 = newff(set_3_examples,set_3_targets,[i j]); net_3.trainparam.min_grad = 0.001; net_3.trainParam.epochs = 100000; net_3.trainParam.lr = 0.5; net_3.trainParam.show = NaN; % network training [net_3_trained,net_3_tr] = train(net_3,set_3_examples,set_3_targets); % determine mse on test data (from network training) net_3_perf = net_3_tr.tperf(1,size(net_3_tr.tperf,2)); net_3_perf_collection(i,j) = net_3_perf; i j end end net_1_perf_collection; surf(net_1_perf_collection); hold on; surf(net_2_perf_collection); mesh(net_3_perf_collection); % generate difference plots manually from the following data diff_net_1_net_2 = net_1_perf_collection - net_2_perf_collection; diff_net_2_net_3 = net_2_perf_collection - net_3_perf_collection; diff_net_1_net_3 = net_1_perf_collection - net_3_perf_collection; duration = etime(clock, clock_start)