%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Matlab script to perform cross validation on numeric data with neural networks % purpose: data mining with ANNs/MLPs on sports science data especially from olympic swimming % see http://blog.georgruss.de/?p=17 % ----- % NN tries to learn from sports training data and to predict competition results % ----- % Georg Ruß % russ@iws.cs.uni-magdeburg.de % 2007-09-28 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Preparation steps, workspace % clean workspace clear all; % change paths to wherever your data is located and readable to matlab % training data load data23_unnormalisiert.csv; % cross validation data (could be the same as above) load data19_unnormalisiert.csv; % to keep results reproducible: seed random rand('state',0) pretraining = data23_unnormalisiert; crossv = data19_unnormalisiert; % store dimensions of data for later loops [number_of_examples,number_of_attributes] = size(pretraining); [number_of_examples_cv,number_of_attributes_kv] = size(crossv); %% Prepare the data for the neural net % put input attributes and target values into suitable matrices % requires transposing the raw input data train_ex=pretraining(:,1:number_of_attributes-1); train_ex=transpose(train_ex); train_ex_kv=crossv(:,1:number_of_attributes_kv-1); train_ex_kv=transpose(train_ex_kv); % target values are located in the last column of the data targets = pretraining(:,number_of_attributes); targets = transpose(targets); targets_cv = crossv(:,number_of_attributes_kv); targets_cv = transpose(targets_cv); % apply matlab's normalization % yields normalized data with mean of 0 and stddev of 1 [examples_norm,psl] = mapstd(train_ex); [targets_norm,psz] = mapstd(targets); [examples_cv_norm,pslkv] = mapstd(train_ex_kv); [targets_cv_norm,pszkv] = mapstd(targets_cv); % the neural network requires max and min values for each input attribute minmaxranges = minmax(examples_norm); % generate empty matrix to store results of cross-validation in results_cv = zeros(number_of_examples_cv,3); % create and store neural network % each of the cross-validation steps is started with the same network that is generated here % dimension of input layer is calculated from minmaxranges % hidden layer: two neurons, tansig % output layer: one neuron, purelin % training algorithm: trainlm net = newff(minmaxranges,[2 1], {'tansig', 'purelin'}, 'trainlm'); %% loop for cross validation for i = 1:number_of_examples_cv % ugly matrix manipulation stuff % to validate with and trains with the rest % needs two temp variables kvtemp = examples_cv_norm; kvztemp = targets_cv_norm; % this simply deletes the one example from the data kvtemp(:,i) = []; kvtraining = kvtemp; kvztemp(:,i) = []; kvzielwerte = kvztemp; % this takes the one example as validation data valid = examples_cv_norm(:,i); valid_z = targets_cv_norm(:,i); % create training data data_pretraining = examples_norm; data_pretraining_targets = targets_norm; data_maintraining = kvtraining; data_maintraining_targets = kvzielwerte; % set some training parameters for pretraining, can be adjusted easily net.trainparam.min_grad = 0.00001; net.trainParam.epochs = 2000; net.trainParam.lr = 0.1; net.trainParam.show = NaN; % the initialized network "net" is pretrained net_pretrained = train(net,data_pretraining,data_pretraining_targets); % set some training parameters for main training net_pretrained.trainParam.min_grad = 0.00001; net_pretrained.trainParam.epochs = 4000; net_pretrained.trainParam.lr = 0.1; net_pretrained.trainParam.show = NaN; % the pretrained network is fed the main training data net_fertigtrainiert = train(net_pretrained,data_maintraining,data_maintraining_targets); % the cross validation is carried out (the net's output with the cv input is calculated) valid_sim = sim(net_fertigtrainiert,valid); % the output is mapped back to the original dimensions % result matrix % first column: simulated result % second column: original result % third column: absolute difference between simulation and original results_cv(i,1) = mapstd('reverse',valid_sim,pszkv); results_cv(i,2) = mapstd('reverse',valid_z,pszkv); results_cv(i,3) = abs(results_cv(i,1)-results_cv(i,2)); end %% show and plot results %stddev_kv = std(results_cv,0,1) %mean_kv = mean(results_cv,1) %results_cv sum_len = sum(results_cv,1); sum_errors_len = sum_len(1,3); list_results((j_v-1)*10+j_h,1) = j_v*100; list_results((j_v-1)*10+j_h,2) = j_h*100; list_results((j_v-1)*10+j_h,3) = sum_results_len; % show raw numeric results list_results %% optionally plot results plot(results_cv(:,1),'-ok') hold on; plot(results_cv(:,2),'-sk') plot(results_cv(:,3),'-dk') title('LEN'); h = legend('net prediction','target','error',1); set(h,'Interpreter','none') plot(list_results(:,3),'-ok') %% optionally write output -- requires changing the path below %file_id = fopen('/home/russ/matlab_kv','wt'); %dlmwrite('/home/russ/matlab_kv',results_cv,'\t'); %fprintf(file_id,'%3.1f %3.1f %3.1f \n',results_cv); %fclose(file_id);