R code - Predict temperature with a KNN
Use KNN to fit the temperature variation in China.
Features: longitude, latitude, elevation.
Tuned with caret
packet, parallel allowed.
rm(list = ls())
library(haven)
library(kknn)
library(caret)
library(randomForest)
library(ranger)
library(doParallel)
set.seed(233)
cl <- makeCluster(12)
registerDoParallel(cl)
sample <- read_stata("station_train_sample.dta")
PredSample <- read_stata('countyElevLoc.dta')
year <- sample$year
content <- c()
MSE_content <- matrix(data = NA, nrow = 25)
names <- c()
count = 0
for (t in c(1990:2014)) {
count = count+1
id <- year == t
yrsample <- sample[id,c(2:5)]
tr <- createDataPartition(yrsample$temp,p = 0.85,list =F)
trainSp <- yrsample[tr,]
testSp <- yrsample[-tr,]
control <- trainControl(method = "repeatedcv",number = 5,repeats = 3)
knnGrid <- expand.grid(kmax = c(1:10),distance = c(0.5,0.8,1,1.2,1.5,1.8,2,2.5,3,3.5,5)
,kernel = 'optimal')
fit <- train(temp~., data = trainSp,
method = "kknn",
trControl = control,
preProcess = c('center','scale'),
tuneGrid = knnGrid,
verbose = T,
allowParallel = T
)
fitvalue <- predict(fit,newdata = testSp)
png(filename = paste('year_',t,'.png',sep = ""))
plot(fitvalue, testSp$temp,xlim = c(-10,30),ylim = c(-10,30),xlab = "Predicted value",ylab = "True Value",main = t)
lines(c(-10:30),c(-10:30),col = 'red')
dev.off()
value <- predict(fit,newdata = PredSample)
content <- cbind(content,value)
MSE_content[count,] <- sum((fitvalue- testSp$temp)^2)/length(fitvalue)
names = c(names,paste('year_',t,sep = ""))
}
colnames(content) <- names
out = cbind(PredSample,content)
write_dta(data = out,path = "CountyTemp.dta")