#------------------------------ # Exercise 1 #------------------------------ #------------------------------ # Question a pi<-0.90 mu<-2 sig<- 1 a=10; c<-1/(2*a) n<- 100 w<-vector("numeric",n) z<-w for(i in 1:n){ z[i]=sample(c(1,0),size=1,prob=c(pi,1-pi)) if(z[i]==1){ w[i]<- rnorm(1,mean=mu,sd=sig) } else{ w[i]<-runif(1,min=-a,max=a) } } boxplot(w) #------------------------------ # Question b # Observed data Loglikelihood loglik<- function(theta,w){ phi <- sapply(w,dnorm,mean=theta[1],sd=theta[2]) logL <- sum(log(theta[3]*phi+(1-theta[3])*c)) return(logL) } # EM algoritm em_outlier <- function(w,theta0,a,epsi){ go_on<-TRUE logL0<- loglik(theta0,w) t<-0 c<-1/(2*a) n<-length(w) print(c(t,logL0)) while(go_on){ t<-t+1 # E-step phi <- sapply(w,dnorm,mean=theta0[1],sd=theta0[2]) z<- phi*theta0[3]/(phi*theta0[3]+c*(1-theta0[3])) # M-step S<- sum(z) pi<-S/n mu<- sum(w*z)/S sig<-sqrt(sum(z*(w-mu)^2)/S) theta<-c(mu,sig,pi) logL<-loglik(theta,w) if (logL-logL0 < epsi){ go_on <- FALSE } logL0 <- logL theta0<-theta print(c(t,logL)) } return(list(loglik=logL,theta=theta,z=z)) } #------------------------------ # Question c mu0<-mean(w) # +rnorm(1,mean=0,sd=0.5) sig0<-sd(w) pi0<-0.5 epsi<-1e-6 theta0<-c(mu0,sig0,pi0) estim<-em_outlier(w,theta0,a,epsi) plot(w,estim$z) #------------------------------ # Question d opt<-optim(theta0, loglik, gr = NULL,method = "BFGS", hessian = FALSE,control=list(fnscale=-1),w=w) print(opt$par) # estimates computed with optim print(estim$theta) # estimates computed with EM