library(Ecdat) library(evclass) # Question 1 data(Mode) y<-as.numeric(Mode[Mode$choice!='carpool',1]) y<-as.numeric(as.factor(y)) x<-scale(Mode[Mode$choice!='carpool',c(2,4,5,6,8,9)]) # Question 2 n<-length(y) train<-sample(1:n,round(2*n/3)) x.train<-x[train,] x.test<-x[-train,] y.train<-y[train] y.test<-y[-train] ntrain=length(y.train) # Question 3 K<-5 fit<-EkNNfit(x.train,y.train,K) val<-EkNNval(x.train, y.train, x.test, K, y.test, fit$param) val$err table(as.numeric(y.test),val$ypred) # Question 4 err<-rep(0,15) i<-0 for(K in 1:15){ fit<-EkNNfit(x.train,y.train,K,options=list(maxiter=100,eta=0.1,gain_min=1e-5,disp=FALSE)) err[K]<-fit$err } plot(1:15,err,type="b",xlab='K',ylab='LOO error rate') # Question 5 param0<-proDSinit(x.train, y.train, nproto=7, nprotoPerClass = FALSE, crisp = FALSE) fit<-proDSfit(x.train, y.train, param=param0) val<-proDSval(x.test,fit$param,y.test) ## Confusion matrix table(y.test,val$ypred) val$err # Question 6 K<-5 folds=sample(1:K,ntrain,replace=TRUE) MU<-c(1e-9,1e-6,1e-5,1e-4,0.001,0.01) N<-length(MU) CV<-rep(0,N) nproto<-15 param0<-proDSinit(x.train, y.train, nproto=nproto, nprotoPerClass = FALSE, crisp = FALSE) options = list(maxiter = 500, eta = 0.1, gain_min =1e-04, disp = 0) for(i in (1:N)){ for(k in (1:K)){ print(c(i,k)) fit<-proDSfit(x.train[folds!=k,], y.train[folds!=k], param=param0,mu=MU[i],options=options) val<-proDSval(x.train[folds==k,],fit$param,y.train[folds==k]) CV[i]<-CV[i]+ length(which(folds==k))*val$err } CV[i]<-CV[i]/n } plot(MU,CV,type='b',xlab=expression(mu),ylab='CV error rate',log="x")