################################################################################### # # Cross validation test by using Simple or Ordinary Kriging # # Last review: 25/06/2018 # email: ja@fct.unl.pt # ################################################################################### ################################################################################### ktype <- "KN" varcolXY <- c(1,2) varlistindex <- 7 maxpercent <- 50 nclasses <- 10 nodata <- -9 maxsamples <- 10 medialocal <- mean(gtd[,varlistindex]) fileOutTVC="C:/temp/Cd_TVC.OUT" # # Models are inputed by proportion of variance (sum of Nugget effect + Sills = 1) # nugget_effect <- 0.0 nstructures <- 1 model_type <- c("Sph","Sph") sill <- c(1.0,1.0) range <- c(1600,1) anisotropy <- c(2,1) maindir <- c(60.0,0.0) ################################################################################### library("ggplot2") ptm <- proc.time() maintitle <- ifelse(ktype == "KS", "KS", "KN") # # Sill calculation # maindir <- ifelse (maindir < 0, maindir+180, maindir) patamar=nugget_effect if (nstructures >= 1) patamar <- patamar+sum(sill[1:nstructures]) # # Duplicate gtd -> gtdnew # gtdnew <- data.frame(gtd[,c(varcolXY,varlistindex)],0,0,0) nams=dim(gtdnew)[1] varlistname=names(gtdnew)[3] dist <- matrix(rep(0, 2*nams), nrow=nams, ncol=2) # # Compute ellipses # rrr <- range/anisotropy theta <- seq(0,359) xx <- rep(NaN, times=360) yy <- rep(NaN, times=360) xxx <- array(rep(0, 2*360), c(2,360)) yyy <- array(rep(0, 2*360), c(2,360)) rangecorr <- array(rep(0, 2*2*360), c(2,2,360)) if (nstructures > 0) { for(k in 1:nstructures){ aroda <- maindir[k]*pi/180 yy[1:90] <- sqrt(1/((tan(theta[1:90]*pi/180)^2/rrr[k]^2)+(1/range[k]^2))) yy[91:180] <- -sqrt(1/((tan(theta[91:180]*pi/180)^2/rrr[k]^2)+(1/range[k]^2))) yy[181:270] <- -sqrt(1/((tan(theta[181:270]*pi/180)^2/rrr[k]^2)+(1/range[k]^2))) yy[271:360] <- sqrt(1/((tan(theta[271:360]*pi/180)^2/rrr[k]^2)+(1/range[k]^2))) xx[1:180] <- sqrt((1-(yy[1:180]^2/range[k]^2))*rrr[k]^2) xx[181:360] <- -sqrt((1-(yy[181:360]^2/range[k]^2))*rrr[k]^2) xxx[k,] <- xx*cos(aroda)+yy*sin(aroda) yyy[k,] <- -xx*sin(aroda)+yy*cos(aroda) if (maindir[k] == 0) { rangecorr[k,1,] <- c(0:359) } else { rangecorr[k,1,] <- c(maindir[k]:359,0:(maindir[k]-1)) } rangecorr[k,2,] <- sqrt(xxx[k,]^2+yyy[k,]^2) rangecorr[k,,] <- rangecorr[k,,order(rangecorr[k,1,])] } } # # Kriging estimation # if (ktype == "KS") { neq=maxsamples } else if (ktype == "KN") { neq=maxsamples+1 } covaesq <- matrix(rep(0, neq*neq), nrow=neq, ncol=neq) covadir <- matrix(rep(0, neq), nrow=neq, ncol=1) for (n in 1:nams) { dist[,1] <- c(1:nams) dist[,2] <- (gtdnew[n,1]-gtdnew[,1])^2+(gtdnew[n,2]-gtdnew[,2])^2 dist <- dist[order(dist[,2]),] # # Build kriging system # covaesq[row(covaesq) == col(covaesq) ] <- patamar if (ktype == "KN") { covaesq[row(covaesq) == neq] <- 1 covaesq[col(covaesq) == neq] <- 1 covaesq[neq,neq] <- 0 covadir[neq] <- 1 } for (j in 1:(maxsamples-1)) { for (i in (j+1):maxsamples) { dx <- gtdnew[dist[i+1,1],1]-gtdnew[dist[j+1,1],1] dy <- gtdnew[dist[i+1,1],2]-gtdnew[dist[j+1,1],2] azim = atan2(dx,dy)*180/pi azim = as.integer(ifelse(azim < 0, azim+360, azim))+1 ddd <- sqrt(dx^2+dy^2) # Modelo de variograma covaesq[i,j]=patamar-nugget_effect if (nstructures > 0) { for(k in 1:nstructures){ if (model_type[k] == "Sph") { covaesq[i,j] = covaesq[i,j] - ifelse (ddd <= rangecorr[k,2,azim], sill[k]*(((3*ddd)/(2*rangecorr[k,2,azim]))-0.5*(ddd/rangecorr[k,2,azim])^3), sill[k]) } else if (model_type[k] == "Exp") { covaesq[i,j] = covaesq[i,j] - sill[k]*(1-exp(-3*ddd/rangecorr[k,2,azim])) } else if (model_type[k] == "Gauss") { covaesq[i,j] = covaesq[i,j] - sill[k]*(1-exp(-3*ddd^2/rangecorr[k,2,azim]^2)) } else if (model_type[k] == "Power") { covaesq[i,j] = covaesq[i,j] - sill[k]*ddd^rangecorr[k,2,azim] } covaesq[j,i] <- covaesq[i,j] } } } } for (j in 1:maxsamples) { dx=gtdnew[dist[j+1,1],1]-gtdnew[n,1] dy=gtdnew[dist[j+1,1],2]-gtdnew[n,2] azim = atan2(dx,dy)*180/pi azim = as.integer(ifelse(azim < 0, azim+360, azim))+1 ddd <- sqrt(dx^2+dy^2) covadir[j]=patamar-nugget_effect if (nstructures > 0) { for(k in 1:nstructures){ if (model_type[k] == "Sph") { covadir[j] <- covadir[j] - ifelse (ddd <= rangecorr[k,2,azim], sill[k]*(((3*ddd)/(2*rangecorr[k,2,azim]))-0.5*(ddd/rangecorr[k,2,azim])^3), sill[k]) } else if (model_type[k] == "Exp") { covadir[j] <- covadir[j] - sill[k]*(1-exp(-3*ddd/rangecorr[k,2,azim])) } else if (model_type[k] == "Gauss") { covadir[j] <- covadir[j] - sill[k]*(1-exp(-3*ddd^2/rangecorr[k,2,azim]^2)) } else if (model_type[k] == "Power") { covadir[j] <- covadir[j] - sill[k]*ddd^rangecorr[k,2,azim] } } } } lambdas <- solve (covaesq) %*% covadir if (ktype == "KS") { gtdnew[n,4] <- medialocal for (j in 1:maxsamples) { gtdnew[n,4] <- gtdnew[n,4] + ((gtdnew[dist[j+1,1],3]-medialocal)*lambdas[j]) } gtdnew[n,5] <- patamar - t(lambdas) %*% covadir } else if (ktype == "KN") { lambdas <- solve (covaesq) %*% covadir gtdnew[n,4] <- 0 for (j in 1:maxsamples) { gtdnew[n,4] <- gtdnew[n,4]+(gtdnew[dist[j+1,1],3]*lambdas[j]) } gtdnew[n,5] <- patamar - lambdas[maxsamples+1] - t(lambdas) %*% covadir } } gtdnew[,5]=gtdnew[,3]-gtdnew[,4] gtdnew[,6]=(gtdnew[,3]-gtdnew[,4])^2 print ("Erro médio") mean(gtdnew[,5]) print ("Erro médio relativo") mean(gtdnew[,5])/mean(gtdnew[,4]) print ("Erro quadrático médio") mean(gtdnew[,6]) print ("Erro quadrático médio relativo") mean(gtdnew[,6])/var(gtdnew[,3]) # # Write output file # con1=file(fileOutTVC,open="w") for (n in 1:nams) { writeLines(paste(gtdnew[,1],gtdnew[,2],gtdnew[,3],gtdnew[,4],gtdnew[,5],gtdnew[,6]), con=con1, sep = "\n") } close(con1) # # View image of estimated values (KS) # par(mfrow=c(2,2),mar=c(4,4,2.5,2.5), mgp=c(2.4, 0.8, 0)) minvar <- min(gtdnew[,3]) maxvar <- max(gtdnew[,3]) minX <- minvar-0.25*(maxvar-minvar) maxX <- maxvar+0.25*(maxvar-minvar) classint <- c(seq(minvar,maxvar,by=(maxvar-minvar)/nclasses)) hist (gtdnew[,3], breaks=classint, las=1, xlim=c(minX,maxX), main="Função densidade de probabilidade", col=rgb(0,0,1,0.5), xlab=varlistname, ylab="f(x)", freq = FALSE) vmin=min(gtdnew[,3],gtdnew[,4]) vmax=max(gtdnew[,3],gtdnew[,4]) plot (gtdnew[,3],gtdnew[,4],xlim=c(vmin,vmax),ylim=c(vmin,vmax),xlab=paste("Valores verdadeiros ", varlistname),ylab=paste("Valores estimados ", varlistname),asp=1, col=ifelse(gtdnew[,5]>=0,ifelse(gtdnew[,5]/mean(gtdnew[,3])>maxpercent/100,rgb(0,0,0,1),rgb(1,0,0,0.5)),ifelse(gtdnew[,5]/mean(gtdnew[,3])< -maxpercent/100,rgb(0,0,0,1),rgb(0,1,0,0.5))), pch=ifelse(gtdnew[,5]>=0,"-","+"), cex=2, main=paste("Valores verdadeiros vs valores estimados",maintitle)) lines (c(vmin,vmax),c(vmin,vmax),col = "red", lwd = 3) points (mean(gtdnew[,3]),mean(gtdnew[,4]), col="black", cex=2, pch=18) px=vmax-0.2*(vmax-vmin) py=vmin+0.10*(vmax-vmin) text(x = px, y = py, adj=0, cex = 1, col = "black", paste("EM: ",format(mean(gtdnew[,5]),digits=3),"\n", "EMR: ",format(mean(gtdnew[,5])/mean(gtdnew[,4]),digits=3),"\n", "EQM: ",format(mean(gtdnew[,6]),digits=3),"\n", "EQMR: ",format(mean(gtdnew[,6])/var(gtdnew[,3]),digits=3),"\n")) plot (gtdnew[,1],gtdnew[,2],xlab="X",ylab="Y",asp=1, col=ifelse(gtdnew[,5]>=0,ifelse(gtdnew[,5]/mean(gtdnew[,3])>maxpercent/100,rgb(0,0,0,1),rgb(1,0,0,0.5)),ifelse(gtdnew[,5]/mean(gtdnew[,3])< -maxpercent/100,rgb(0,0,0,1),rgb(0,1,0,0.5))), pch=ifelse(gtdnew[,5]>=0,"-","+"), cex=2, main="Localização das observações sub e sobre estimadas") qqnorm(gtdnew[,5], main="Verificação informal da normalidade dos resíduos",xlab="Escala distribuição normal", ylab="Resíduos padronizados") qqline(gtdnew[,5], col = "red", lwd = 3) proc.time() - ptm