R package

## obtain data
library(MLDataR)
## data analysis
library(tidyverse)

Data set - Thyroid Disease dataset

data(MLDataR::thyroid_disease)

Get a glimpse of your data

glimpse(thyroid_disease)
Rows: 3,772
Columns: 28
$ ThryroidClass                  <chr> "negative", "negative", "negative", "ne…
$ patient_age                    <int> 41, 23, 46, 70, 70, 18, 59, 80, 66, 68,…
$ patient_gender                 <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
$ presc_thyroxine                <dbl> 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, …
$ queried_why_on_thyroxine       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ presc_anthyroid_meds           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ sick                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ pregnant                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ thyroid_surgery                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ radioactive_iodine_therapyI131 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ query_hypothyroid              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ query_hyperthyroid             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ lithium                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ goitre                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ tumor                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, …
$ hypopituitarism                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ psych_condition                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ TSH_measured                   <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, …
$ TSH_reading                    <dbl> 1.30, 4.10, 0.98, 0.16, 0.72, 0.03, NA,…
$ T3_measured                    <dbl> 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, …
$ T3_reading                     <dbl> 2.5, 2.0, NA, 1.9, 1.2, NA, NA, 0.6, 2.…
$ T4_measured                    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ T4_reading                     <dbl> 125, 102, 109, 175, 61, 183, 72, 80, 12…
$ thyrox_util_rate_T4U_measured  <dbl> 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ thyrox_util_rate_T4U_reading   <dbl> 1.14, NA, 0.91, NA, 0.87, 1.30, 0.92, 0…
$ FTI_measured                   <dbl> 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ FTI_reading                    <dbl> 109, NA, 120, NA, 70, 141, 78, 115, 132…
$ ref_src                        <chr> "SVHC", "other", "other", "other", "SVI…

Split data into training and test

df <- thyroid_disease %>% mutate(id = row_number())
## set the seed to make your partition reproducible
set.seed(123)
train <- df %>% sample_frac(.80)
dim(train)
[1] 3018   29
test <- anti_join(df, train, by = 'id')
dim(test)
[1] 754  29

Your turn

Develop a model to predict ThryroidCLass.