8.14 Data Template
library(janitor) # Cleanup: clean_names().
library(rattle) # Dataset: weatherAUS.
library(magrittr) # Pipelines: %<>%.
library(randomForest) # Imputation: na.roughfix().
library(dplyr) # Wrangling: rename_all().
<- "weatherAUS"
dsname <- get(dsname)
ds <- nrow(ds)
nobs
dim(ds)
<- names(ds)
vnames %<>% clean_names(numerals="right")
ds names(vnames) <- names(ds)
names(ds)
<- names(ds)
vars <- "rain_tomorrow"
target <- c(target, vars) %>% unique() %>% rev()
vars
for (v in which(sapply(ds, is.factor))) levels(ds[[v]]) %<>% normVarNames()
<- "risk_mm"
risk <- c("date", "location")
id <- c(risk, id)
ignore <- setdiff(vars, ignore)
vars <- setdiff(vars, target)
inputs
<- formula(paste(target, "~ ."))
form
# ds[vars] %<>% na.roughfix()
<- c(0.7, 0.15, 0.15)
SPLIT
<- sample(nobs, SPLIT[1]*nobs)
tr <- nobs %>% seq_len() %>% setdiff(tr) %>% sample(SPLIT[2]*nobs)
tu <- nobs %>% seq_len() %>% setdiff(tr) %>% setdiff(tu)
te
<- ds %>% slice(tr) %>% pull(target)
target.tr <- ds %>% slice(tu) %>% pull(target)
target.tu <- ds %>% slice(te) %>% pull(target)
target.te
if (!is.null(risk))
{<- ds %>% slice(tr) %>% pull(risk)
risk.tr <- ds %>% slice(tu) %>% pull(risk)
risk.tu <- ds %>% slice(te) %>% pull(risk)
risk.te }
Your donation will support ongoing availability and give you access to the PDF version of this book. Desktop Survival Guides include Data Science, GNU/Linux, and MLHub. Books available on Amazon include Data Mining with Rattle and Essentials of Data Science. Popular open source software includes rattle, wajig, and mlhub. Hosted by Togaware, a pioneer of free and open source software since 1984. Copyright © 1995-2022 Graham.Williams@togaware.com Creative Commons Attribution-ShareAlike 4.0
