## obtain data
library(MLDataR)
## data analysis
library(tidyverse)
data(MLDataR::thyroid_disease)
glimpse(thyroid_disease)
Rows: 3,772
Columns: 28
$ ThryroidClass <chr> "negative", "negative", "negative", "ne…
$ patient_age <int> 41, 23, 46, 70, 70, 18, 59, 80, 66, 68,…
$ patient_gender <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
$ presc_thyroxine <dbl> 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, …
$ queried_why_on_thyroxine <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ presc_anthyroid_meds <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ sick <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ pregnant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ thyroid_surgery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ radioactive_iodine_therapyI131 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ query_hypothyroid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ query_hyperthyroid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ lithium <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ goitre <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tumor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, …
$ hypopituitarism <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ psych_condition <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ TSH_measured <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, …
$ TSH_reading <dbl> 1.30, 4.10, 0.98, 0.16, 0.72, 0.03, NA,…
$ T3_measured <dbl> 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, …
$ T3_reading <dbl> 2.5, 2.0, NA, 1.9, 1.2, NA, NA, 0.6, 2.…
$ T4_measured <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ T4_reading <dbl> 125, 102, 109, 175, 61, 183, 72, 80, 12…
$ thyrox_util_rate_T4U_measured <dbl> 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ thyrox_util_rate_T4U_reading <dbl> 1.14, NA, 0.91, NA, 0.87, 1.30, 0.92, 0…
$ FTI_measured <dbl> 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ FTI_reading <dbl> 109, NA, 120, NA, 70, 141, 78, 115, 132…
$ ref_src <chr> "SVHC", "other", "other", "other", "SVI…
df <- thyroid_disease %>% mutate(id = row_number())
## set the seed to make your partition reproducible
set.seed(123)
train <- df %>% sample_frac(.80)
dim(train)
[1] 3018 29
test <- anti_join(df, train, by = 'id')
dim(test)
[1] 754 29
Develop a model to predict ThryroidCLass
.