%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Matlab script to perform cross validation on models generated for % numeric data from agriculture; % yield prediction task = multi-dimensional regression % % purpose: data mining with SVMs/MLPs/RBFs/RegTree % % compares: % - support vector regression % - multi-layer perceptron % - radial basis function network % - regression tree % % conference: ICDM 2009 http://www.data-mining-forum.de/icdm2009.php % description: http://blog.georgruss.de/?p=145 % % requires: % - statistics toolbox for cvpartition (from Matlab2008a) % - neural network toolbox % - SVMTorch (compiled version, of course) % - data structure used for the results (cv_results) % - savefig script from matlab central % (mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=10889) % - a local (user's) .matlab directory % - SOM toolbox for data access: % (http://www.cis.hut.fi/projects/somtoolbox/) % - modelcomparison.m script (link must be in blog post or nearby) % % % new: % - comparison of SVM result vs. NNet result vs. ... % RBF result vs. regtree result % - included normalization % - figures are stored as eps and jpg automagically % - workspace/model results are stored for later usage % % ----- % Georg Ru{\ss} % russ@iws.cs.uni-magdeburg.de % 2009-01-26 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Preparation steps, workspace % clean workspace clear all; % set clock clock_start = clock; % seed random for reproducible results %rand('seed',1); % change paths to wherever your data is located and readable to matlab % uses script readColData from % http://web.cecs.pdx.edu/~gerry/MATLAB/plotting/loadingPlotData.html#colHeadings %[label,id_column,data]=readColData('sorted_all',10,10,1); % data read into struct using SOM toolbox sD330=som_read_data('330_2006-alle-labels-som.data'); sD131=som_read_data('131_var_all_labels_som.data'); sD131net= som_read_data('131_var_netz_som.data'); sDall = som_read_data('sorted_all-som.data'); % convert labels to strings instead of character arrays; transpose % label = transpose(cellstr(label)); %% data-specific stuff % generate three data sets for the nnet to play with % one: N1, Yield2003, EM38 -- target: Yield2004 % two: N1, Yield2003, EM38, N2, REIP32 -- target: Yield2004 % three: N1, Yield2003, EM38, N2, REIP32, N3, REIP49 -- target: Yield2004 % works by eliminating the respective columns from the 'data' matrix above %set_2(:,3) = []; % only set_3 is actually used here %set_3 = data; %set_3(:,7) = []; %label(:,1) = []; % delete "ID" label %label(:,7) = []; %Size_set_3 = size(set_3); sDall.comp_names(8,:) = []; sDall.comp_names(1,:) = []; sDall.data(:,8) = []; sDall.data(:,1) = []; %% %sDall.c modelcomparison(sD330.data,'F330',20,sD330.comp_names); modelcomparison(sD131.data,'F131',20,sD131.comp_names); modelcomparison(sD131net.data,'F131net',20,sD131net.comp_names); modelcomparison(sDall.data,'F04',20,sDall.comp_names); %% get script stats duration = etime(clock, clock_start)