Packages

library(tidymodels)
library(tidyverse)
library(palmerpenguins)
library(rpart)
library(skimr)
library(rpart.plot)

Data

data(penguins)
skim(penguins)
Data summary
Name penguins
Number of rows 344
Number of columns 8
_______________________
Column type frequency:
factor 3
numeric 5
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
species 0 1.00 FALSE 3 Ade: 152, Gen: 124, Chi: 68
island 0 1.00 FALSE 3 Bis: 168, Dre: 124, Tor: 52
sex 11 0.97 FALSE 2 mal: 168, fem: 165

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bill_length_mm 2 0.99 43.92 5.46 32.1 39.23 44.45 48.5 59.6 ▃▇▇▆▁
bill_depth_mm 2 0.99 17.15 1.97 13.1 15.60 17.30 18.7 21.5 ▅▅▇▇▂
flipper_length_mm 2 0.99 200.92 14.06 172.0 190.00 197.00 213.0 231.0 ▂▇▃▅▂
body_mass_g 2 0.99 4201.75 801.95 2700.0 3550.00 4050.00 4750.0 6300.0 ▃▇▆▃▂
year 0 1.00 2008.03 0.82 2007.0 2007.00 2008.00 2009.0 2009.0 ▇▁▇▁▇

Split data

set.seed(123)
penguin_split <- initial_split(penguins)
penguin_train <- training(penguin_split)
dim(penguin_train)
## [1] 258   8
head(penguin_train)
## # A tibble: 6 × 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <fct>   <fct>           <dbl>         <dbl>            <int>       <int> <fct>
## 1 Gentoo  Biscoe           44.5          14.3              216        4100 <NA> 
## 2 Adelie  Torge…           38.6          21.2              191        3800 male 
## 3 Gentoo  Biscoe           45.3          13.7              210        4300 fema…
## 4 Chinst… Dream            52.8          20                205        4550 male 
## 5 Adelie  Torge…           37.3          20.5              199        3775 male 
## 6 Chinst… Dream            43.2          16.6              187        2900 fema…
## # … with 1 more variable: year <int>
penguin_test <- testing(penguin_split)
dim(penguin_test)
## [1] 86  8

Build decision tree

tree1 <- rpart(species ~ ., penguin_train,  cp = 0.1)
rpart.plot(tree1, box.palette="RdBu", shadow.col="gray", nn=TRUE)

tree2 <- rpart(species ~ ., penguin_train,  cp = 0.5)
rpart.plot(tree2, box.palette="RdBu", shadow.col="gray", nn=TRUE)

Predict

predict(tree1, penguin_test)
##        Adelie  Chinstrap     Gentoo
## 1  0.95726496 0.04273504 0.00000000
## 2  0.95726496 0.04273504 0.00000000
## 3  0.95726496 0.04273504 0.00000000
## 4  0.95726496 0.04273504 0.00000000
## 5  0.95726496 0.04273504 0.00000000
## 6  0.95726496 0.04273504 0.00000000
## 7  0.95726496 0.04273504 0.00000000
## 8  0.95726496 0.04273504 0.00000000
## 9  0.95726496 0.04273504 0.00000000
## 10 0.95726496 0.04273504 0.00000000
## 11 0.95726496 0.04273504 0.00000000
## 12 0.95726496 0.04273504 0.00000000
## 13 0.95726496 0.04273504 0.00000000
## 14 0.95726496 0.04273504 0.00000000
## 15 0.95726496 0.04273504 0.00000000
## 16 0.95726496 0.04273504 0.00000000
## 17 0.95726496 0.04273504 0.00000000
## 18 0.95726496 0.04273504 0.00000000
## 19 0.95726496 0.04273504 0.00000000
## 20 0.95726496 0.04273504 0.00000000
## 21 0.95726496 0.04273504 0.00000000
## 22 0.04545455 0.93181818 0.02272727
## 23 0.95726496 0.04273504 0.00000000
## 24 0.95726496 0.04273504 0.00000000
## 25 0.95726496 0.04273504 0.00000000
## 26 0.95726496 0.04273504 0.00000000
## 27 0.95726496 0.04273504 0.00000000
## 28 0.95726496 0.04273504 0.00000000
## 29 0.01030928 0.04123711 0.94845361
## 30 0.95726496 0.04273504 0.00000000
## 31 0.95726496 0.04273504 0.00000000
## 32 0.95726496 0.04273504 0.00000000
## 33 0.95726496 0.04273504 0.00000000
## 34 0.95726496 0.04273504 0.00000000
## 35 0.95726496 0.04273504 0.00000000
## 36 0.95726496 0.04273504 0.00000000
## 37 0.95726496 0.04273504 0.00000000
## 38 0.01030928 0.04123711 0.94845361
## 39 0.01030928 0.04123711 0.94845361
## 40 0.01030928 0.04123711 0.94845361
## 41 0.01030928 0.04123711 0.94845361
## 42 0.01030928 0.04123711 0.94845361
## 43 0.01030928 0.04123711 0.94845361
## 44 0.01030928 0.04123711 0.94845361
## 45 0.01030928 0.04123711 0.94845361
## 46 0.01030928 0.04123711 0.94845361
## 47 0.01030928 0.04123711 0.94845361
## 48 0.01030928 0.04123711 0.94845361
## 49 0.01030928 0.04123711 0.94845361
## 50 0.01030928 0.04123711 0.94845361
## 51 0.01030928 0.04123711 0.94845361
## 52 0.01030928 0.04123711 0.94845361
## 53 0.01030928 0.04123711 0.94845361
## 54 0.01030928 0.04123711 0.94845361
## 55 0.01030928 0.04123711 0.94845361
## 56 0.01030928 0.04123711 0.94845361
## 57 0.01030928 0.04123711 0.94845361
## 58 0.01030928 0.04123711 0.94845361
## 59 0.01030928 0.04123711 0.94845361
## 60 0.01030928 0.04123711 0.94845361
## 61 0.01030928 0.04123711 0.94845361
## 62 0.01030928 0.04123711 0.94845361
## 63 0.01030928 0.04123711 0.94845361
## 64 0.01030928 0.04123711 0.94845361
## 65 0.01030928 0.04123711 0.94845361
## 66 0.01030928 0.04123711 0.94845361
## 67 0.01030928 0.04123711 0.94845361
## 68 0.01030928 0.04123711 0.94845361
## 69 0.04545455 0.93181818 0.02272727
## 70 0.04545455 0.93181818 0.02272727
## 71 0.04545455 0.93181818 0.02272727
## 72 0.04545455 0.93181818 0.02272727
## 73 0.04545455 0.93181818 0.02272727
## 74 0.04545455 0.93181818 0.02272727
## 75 0.04545455 0.93181818 0.02272727
## 76 0.04545455 0.93181818 0.02272727
## 77 0.04545455 0.93181818 0.02272727
## 78 0.04545455 0.93181818 0.02272727
## 79 0.04545455 0.93181818 0.02272727
## 80 0.04545455 0.93181818 0.02272727
## 81 0.04545455 0.93181818 0.02272727
## 82 0.04545455 0.93181818 0.02272727
## 83 0.04545455 0.93181818 0.02272727
## 84 0.01030928 0.04123711 0.94845361
## 85 0.95726496 0.04273504 0.00000000
## 86 0.04545455 0.93181818 0.02272727
t_pred <- predict(tree1, penguin_test, type = "class")
t_pred
##         1         2         3         4         5         6         7         8 
##    Adelie    Adelie    Adelie    Adelie    Adelie    Adelie    Adelie    Adelie 
##         9        10        11        12        13        14        15        16 
##    Adelie    Adelie    Adelie    Adelie    Adelie    Adelie    Adelie    Adelie 
##        17        18        19        20        21        22        23        24 
##    Adelie    Adelie    Adelie    Adelie    Adelie Chinstrap    Adelie    Adelie 
##        25        26        27        28        29        30        31        32 
##    Adelie    Adelie    Adelie    Adelie    Gentoo    Adelie    Adelie    Adelie 
##        33        34        35        36        37        38        39        40 
##    Adelie    Adelie    Adelie    Adelie    Adelie    Gentoo    Gentoo    Gentoo 
##        41        42        43        44        45        46        47        48 
##    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo 
##        49        50        51        52        53        54        55        56 
##    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo 
##        57        58        59        60        61        62        63        64 
##    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo    Gentoo 
##        65        66        67        68        69        70        71        72 
##    Gentoo    Gentoo    Gentoo    Gentoo Chinstrap Chinstrap Chinstrap Chinstrap 
##        73        74        75        76        77        78        79        80 
## Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap 
##        81        82        83        84        85        86 
## Chinstrap Chinstrap Chinstrap    Gentoo    Adelie Chinstrap 
## Levels: Adelie Chinstrap Gentoo

Accuracy

confMat <- table(penguin_test$species,t_pred)
confMat
##            t_pred
##             Adelie Chinstrap Gentoo
##   Adelie        35         1      1
##   Chinstrap      1        16      1
##   Gentoo         0         0     31