Data Science Desktop Survival Guide by Graham Williams Desktop Survival Project Home Preface Data Science Introducing R R Constructs R Tasks R Strings R Read, Write, and Create Data Template Data Exploration Data Wrangling Data Visualisation Statistics ML Template ML Scenarios ML Activities ML Applications ML Algorithms Cluster Analysis Decision Trees Computer Vision Graph Data Privacy Literate Data Science Coding with Style Resources Bibliography Index

## Modelling Roles

# Note the risk variable which measures the severity of the outcome.

risk <- "risk_mm"

# Note the identifiers.

id <- c("date", "location")

# Initialise ignored variables: identifiers.

ignore <- c(risk, id)

# Remove the variables to ignore.

vars <- setdiff(vars, ignore)

# Identify the input variables for modelling.

inputs <- setdiff(vars, target) %T>% print()
 ```## [1] "rain_today" "temp_3pm" "temp_9am" "cloud_3pm" ... ## [5] "cloud_9am" "pressure_3pm" "pressure_9am" "humidity_3pm... ## [9] "humidity_9am" "wind_speed_3pm" "wind_speed_9am" "wind_dir_3pm... ## [13] "wind_dir_9am" "wind_gust_speed" "wind_gust_dir" "sunshine" ... ## [17] "evaporation" "rainfall" "max_temp" "min_temp" ```

# Also record them by indicies.

inputi <-
inputs %>%
sapply(function(x) which(x == names(ds)), USE.NAMES=FALSE) %T>%
print()
 ```## [1] 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 ```

 form <- formula(ds[rev(vars)])