Packages
library(tidymodels)
library(tidyverse)
library(palmerpenguins)
library(rpart)
library(skimr)
library(rpart.plot)
Data
data(penguins)
skim(penguins)
Data summary
Name |
penguins |
Number of rows |
344 |
Number of columns |
8 |
_______________________ |
|
Column type frequency: |
|
factor |
3 |
numeric |
5 |
________________________ |
|
Group variables |
None |
Variable type: factor
species |
0 |
1.00 |
FALSE |
3 |
Ade: 152, Gen: 124, Chi: 68 |
island |
0 |
1.00 |
FALSE |
3 |
Bis: 168, Dre: 124, Tor: 52 |
sex |
11 |
0.97 |
FALSE |
2 |
mal: 168, fem: 165 |
Variable type: numeric
bill_length_mm |
2 |
0.99 |
43.92 |
5.46 |
32.1 |
39.23 |
44.45 |
48.5 |
59.6 |
▃▇▇▆▁ |
bill_depth_mm |
2 |
0.99 |
17.15 |
1.97 |
13.1 |
15.60 |
17.30 |
18.7 |
21.5 |
▅▅▇▇▂ |
flipper_length_mm |
2 |
0.99 |
200.92 |
14.06 |
172.0 |
190.00 |
197.00 |
213.0 |
231.0 |
▂▇▃▅▂ |
body_mass_g |
2 |
0.99 |
4201.75 |
801.95 |
2700.0 |
3550.00 |
4050.00 |
4750.0 |
6300.0 |
▃▇▆▃▂ |
year |
0 |
1.00 |
2008.03 |
0.82 |
2007.0 |
2007.00 |
2008.00 |
2009.0 |
2009.0 |
▇▁▇▁▇ |
Split data
set.seed(123)
penguin_split <- initial_split(penguins)
penguin_train <- training(penguin_split)
dim(penguin_train)
## [1] 258 8
head(penguin_train)
## # A tibble: 6 × 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 Gentoo Biscoe 44.5 14.3 216 4100 <NA>
## 2 Adelie Torge… 38.6 21.2 191 3800 male
## 3 Gentoo Biscoe 45.3 13.7 210 4300 fema…
## 4 Chinst… Dream 52.8 20 205 4550 male
## 5 Adelie Torge… 37.3 20.5 199 3775 male
## 6 Chinst… Dream 43.2 16.6 187 2900 fema…
## # … with 1 more variable: year <int>
penguin_test <- testing(penguin_split)
dim(penguin_test)
## [1] 86 8
Build decision tree
tree1 <- rpart(species ~ ., penguin_train, cp = 0.1)
rpart.plot(tree1, box.palette="RdBu", shadow.col="gray", nn=TRUE)
tree2 <- rpart(species ~ ., penguin_train, cp = 0.5)
rpart.plot(tree2, box.palette="RdBu", shadow.col="gray", nn=TRUE)
Predict
predict(tree1, penguin_test)
## Adelie Chinstrap Gentoo
## 1 0.95726496 0.04273504 0.00000000
## 2 0.95726496 0.04273504 0.00000000
## 3 0.95726496 0.04273504 0.00000000
## 4 0.95726496 0.04273504 0.00000000
## 5 0.95726496 0.04273504 0.00000000
## 6 0.95726496 0.04273504 0.00000000
## 7 0.95726496 0.04273504 0.00000000
## 8 0.95726496 0.04273504 0.00000000
## 9 0.95726496 0.04273504 0.00000000
## 10 0.95726496 0.04273504 0.00000000
## 11 0.95726496 0.04273504 0.00000000
## 12 0.95726496 0.04273504 0.00000000
## 13 0.95726496 0.04273504 0.00000000
## 14 0.95726496 0.04273504 0.00000000
## 15 0.95726496 0.04273504 0.00000000
## 16 0.95726496 0.04273504 0.00000000
## 17 0.95726496 0.04273504 0.00000000
## 18 0.95726496 0.04273504 0.00000000
## 19 0.95726496 0.04273504 0.00000000
## 20 0.95726496 0.04273504 0.00000000
## 21 0.95726496 0.04273504 0.00000000
## 22 0.04545455 0.93181818 0.02272727
## 23 0.95726496 0.04273504 0.00000000
## 24 0.95726496 0.04273504 0.00000000
## 25 0.95726496 0.04273504 0.00000000
## 26 0.95726496 0.04273504 0.00000000
## 27 0.95726496 0.04273504 0.00000000
## 28 0.95726496 0.04273504 0.00000000
## 29 0.01030928 0.04123711 0.94845361
## 30 0.95726496 0.04273504 0.00000000
## 31 0.95726496 0.04273504 0.00000000
## 32 0.95726496 0.04273504 0.00000000
## 33 0.95726496 0.04273504 0.00000000
## 34 0.95726496 0.04273504 0.00000000
## 35 0.95726496 0.04273504 0.00000000
## 36 0.95726496 0.04273504 0.00000000
## 37 0.95726496 0.04273504 0.00000000
## 38 0.01030928 0.04123711 0.94845361
## 39 0.01030928 0.04123711 0.94845361
## 40 0.01030928 0.04123711 0.94845361
## 41 0.01030928 0.04123711 0.94845361
## 42 0.01030928 0.04123711 0.94845361
## 43 0.01030928 0.04123711 0.94845361
## 44 0.01030928 0.04123711 0.94845361
## 45 0.01030928 0.04123711 0.94845361
## 46 0.01030928 0.04123711 0.94845361
## 47 0.01030928 0.04123711 0.94845361
## 48 0.01030928 0.04123711 0.94845361
## 49 0.01030928 0.04123711 0.94845361
## 50 0.01030928 0.04123711 0.94845361
## 51 0.01030928 0.04123711 0.94845361
## 52 0.01030928 0.04123711 0.94845361
## 53 0.01030928 0.04123711 0.94845361
## 54 0.01030928 0.04123711 0.94845361
## 55 0.01030928 0.04123711 0.94845361
## 56 0.01030928 0.04123711 0.94845361
## 57 0.01030928 0.04123711 0.94845361
## 58 0.01030928 0.04123711 0.94845361
## 59 0.01030928 0.04123711 0.94845361
## 60 0.01030928 0.04123711 0.94845361
## 61 0.01030928 0.04123711 0.94845361
## 62 0.01030928 0.04123711 0.94845361
## 63 0.01030928 0.04123711 0.94845361
## 64 0.01030928 0.04123711 0.94845361
## 65 0.01030928 0.04123711 0.94845361
## 66 0.01030928 0.04123711 0.94845361
## 67 0.01030928 0.04123711 0.94845361
## 68 0.01030928 0.04123711 0.94845361
## 69 0.04545455 0.93181818 0.02272727
## 70 0.04545455 0.93181818 0.02272727
## 71 0.04545455 0.93181818 0.02272727
## 72 0.04545455 0.93181818 0.02272727
## 73 0.04545455 0.93181818 0.02272727
## 74 0.04545455 0.93181818 0.02272727
## 75 0.04545455 0.93181818 0.02272727
## 76 0.04545455 0.93181818 0.02272727
## 77 0.04545455 0.93181818 0.02272727
## 78 0.04545455 0.93181818 0.02272727
## 79 0.04545455 0.93181818 0.02272727
## 80 0.04545455 0.93181818 0.02272727
## 81 0.04545455 0.93181818 0.02272727
## 82 0.04545455 0.93181818 0.02272727
## 83 0.04545455 0.93181818 0.02272727
## 84 0.01030928 0.04123711 0.94845361
## 85 0.95726496 0.04273504 0.00000000
## 86 0.04545455 0.93181818 0.02272727
t_pred <- predict(tree1, penguin_test, type = "class")
t_pred
## 1 2 3 4 5 6 7 8
## Adelie Adelie Adelie Adelie Adelie Adelie Adelie Adelie
## 9 10 11 12 13 14 15 16
## Adelie Adelie Adelie Adelie Adelie Adelie Adelie Adelie
## 17 18 19 20 21 22 23 24
## Adelie Adelie Adelie Adelie Adelie Chinstrap Adelie Adelie
## 25 26 27 28 29 30 31 32
## Adelie Adelie Adelie Adelie Gentoo Adelie Adelie Adelie
## 33 34 35 36 37 38 39 40
## Adelie Adelie Adelie Adelie Adelie Gentoo Gentoo Gentoo
## 41 42 43 44 45 46 47 48
## Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo
## 49 50 51 52 53 54 55 56
## Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo
## 57 58 59 60 61 62 63 64
## Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo Gentoo
## 65 66 67 68 69 70 71 72
## Gentoo Gentoo Gentoo Gentoo Chinstrap Chinstrap Chinstrap Chinstrap
## 73 74 75 76 77 78 79 80
## Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap Chinstrap
## 81 82 83 84 85 86
## Chinstrap Chinstrap Chinstrap Gentoo Adelie Chinstrap
## Levels: Adelie Chinstrap Gentoo
Accuracy
confMat <- table(penguin_test$species,t_pred)
confMat
## t_pred
## Adelie Chinstrap Gentoo
## Adelie 35 1 1
## Chinstrap 1 16 1
## Gentoo 0 0 31