# STA 410, ASSIGNMENT 3, SPRING 2004. # FIND VALUES OF NUMBERS IN A VECTOR MODULO 2PI. mod2pi <- function (x) { f <- x/(2*pi) f <- f-floor(f) f*(2*pi) } # FIND THE PROBABILITY DENSITY OF POINTS UNDER THE WRAPPED NORMAL DISTRIBUTION. # The arguments are the the vector of points, the mean of the wrapped normal, # and the standard deviation of the wrapped normal. The points and the mean # are moved to the interval [0,2pi) if necessary. # # The density is computed by adding the density of the point under the normal # distribution with means of mu+2*pi*i for i an integer from -t to t, with t # chosen so that 2*pi*i will span a range of at least 10 standard deviations. dwrpnorm <- function (x, mu, sigma) { x <- mod2pi(x) mu <- mod2pi(mu) t <- ceiling(10*sigma/(2*pi)) d <- rep(0,length(x)) for (i in (-t):t) { d <- d + dnorm (x, mu+2*pi*i, sigma) } d } # LOG LIKELIHOOD FUNCTION FOR WRAPPED NORMAL MODEL. Arguments are the vector # of data points, and the mean and standard deviation of the distribution. lglik <- function (x, mu, sigma) { sum (log (dwrpnorm (x, mu, sigma))) } # FIND THE MAXIMUM LIKELIHOOD ESTIMATES FOR MU AND SIGMA USING EM. The # arguments are the vector of data points, the initial values for mu and # sigma, the number of iterations of EM to do, and a debug flag, which # if TRUE causes the parameters and log likelihood to be printed for # each iteration. The value of this function is a list with elements # "mu" and "sigma", containing the final maximum likelihood estimates. em <- function (x, mu, sigma, iters=30, debug=FALSE) { # Make sure data and mean arguments are in [0,2pi). x <- mod2pi(x) mu <- mod2pi(mu) if (debug) { cat(0,mu,sigma,lglik(x,mu,sigma),"\n") } for (k in 1:iters) { # The E step. First, sets "i" to the wrap indices that span the # relevant range. Then allocates space for "y" and "d" matrices. The # "y" matrix is set to the data values offset by the multiples of 2pi # corresponding to these indices. (These are possible unobserved data # values before wrapping.) The "d" matrix is first set to the probability # density of the "y" values under a normal distribution with mean mu # and standard deviation sigma. The "d" values are then normalized to # sum to one for each data point, so that they can be regarded as # probabilities for the different "y" values. t <- max(1,ceiling(10*sigma/(2*pi))) i <- (-t):t y <- matrix(0,length(x),length(i)) d <- matrix(0,length(x),length(i)) for (j in 1:length(i)) { y[,j] <- x+2*pi*i[j] d[,j] <- dnorm (y[,j], mu, sigma) } for (j in 1:length(x)) { d[j,] <- d[j,] / sum(d[j,]) } # M step. The mean is re-estimated based on the weighted average of # the "y" values. The standard deviation is then re-estimated based # on the weighted average of squared deviations from mu. mu <- sum(d*y)/length(x) sigma <- sqrt(sum(d*(y-mu)^2)/length(x)) # Move mu into the range [0,2p). Done only AFTER re-estimating sigma. mu <- mod2pi(mu) if (debug) { cat(k,mu,sigma,lglik(x,mu,sigma),"\n"); } } list (mu=mu, sigma=sigma) }