đź‘© Cara Thompson
👩‍💻 Love for patterns in music & language, and a fascination with the human brain %>%
Psychology PhD %>%
Analysis of postgraduate medical examinations %>%
Data Visualisation Consultant
đź’™ Helping others maximise the impact of their expertise
Find out more: cararthompson.com/about
Change the theme
Change the colours
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)") +
theme_minimal() +
scale_colour_manual(values = c("pink",
"orange",
"darkgreen"))
Change the colours - wait a minute…
head(palmerpenguins::penguins, 200) |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)") +
theme_minimal() +
scale_colour_manual(values = c("pink",
"orange",
"darkgreen"))
Mini tip #1: Named vectors for colours!
penguin_colours <- c(Adelie = "pink",
Chinstrap = "orange",
Gentoo = "darkgreen")
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)") +
theme_minimal() +
scale_colour_manual(values = penguin_colours)
Mini tip #1: Named vectors for colours!
penguin_colours <- c(Adelie = "pink",
Chinstrap = "orange",
Gentoo = "darkgreen")
head(palmerpenguins::penguins, 200) |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)") +
theme_minimal() +
scale_colour_manual(values = penguin_colours)
Mini tip #2: Invest in your own custom theme
penguin_colours <- c(Adelie = "pink",
Chinstrap = "orange",
Gentoo = "darkgreen")
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo() +
scale_colour_manual(values = penguin_colours)
Mini tip #2: Invest in your own custom theme with relative text sizes
penguin_colours <- c(Adelie = "pink",
Chinstrap = "orange",
Gentoo = "darkgreen")
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(base_size = 16) +
scale_colour_manual(values = penguin_colours)
I ❤️ {ggtext}
mean_x_y <- palmerpenguins::penguins |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE))
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = species),
size = 7) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = penguin_colours)
I ❤️ {ggtext}
mean_x_y <- palmerpenguins::penguins |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
count = length(species))
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
"<br>N = ", count)),
size = 7) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = penguin_colours)
I ❤️ {ggtext}
mean_x_y <- palmerpenguins::penguins |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(species))
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = penguin_colours)
I ❤️ {ggtext}
mean_x_y <- palmerpenguins::penguins |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(species))
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species,
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo() +
scale_colour_manual(values = penguin_colours)
“Oh, I’m sorry, I actually just wanted the first 200 penguins…”
mean_x_y <- head(palmerpenguins::penguins, 200) |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(species))
head(palmerpenguins::penguins, 200) |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species,
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = penguin_colours)
“Hmm, wait, could do both, for comparison?”
“… and could we check with 300 and 400 also?”
“I realise the meeting is in 5 minutes, but I really don’t like those fonts and colours… Could you change them?”
Our starting point
mean_x_y <- palmerpenguins::penguins |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(species))
palmerpenguins::penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species,
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = penguin_colours)
Mini tips #3 and #4: Defaults and environments
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours) {
mean_x_y <- df |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(species))
df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species,
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
}
Let’s see the original again…
And can we do it with just the first 300 and 400 penguins?
Actually, let’s do it by island…
Rather than grouping by species, can we group by island?
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours) {
mean_x_y <- df |>
group_by(species) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(species))
df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = species,
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", species, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = "Flipper lengths are proportional to bill lengths within each species",
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
}
This doesn’t work…
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours,
grouping_variable = species) {
mean_x_y <- df |>
group_by(grouping_variable) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(grouping_variable))
df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = grouping_variable),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", grouping_variable, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = paste0("Flipper lengths are proportional to bill lengths within each ",
grouping_variable),
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
}
We need get()
!
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours,
grouping_variable = "species") {
mean_x_y <- df |>
group_by(get(grouping_variable)) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(get(grouping_variable)))
df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = get(grouping_variable),
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", get(grouping_variable), "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = paste0("Flipper lengths are proportional to bill lengths within each ",
grouping_variable),
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
}
get()
? Interpret string into object name, return its value
get()
? Interpret string as object name, return its value
Still one thing to fix…
Error in `ggtext::geom_textbox()`:
! Problem while computing aesthetics.
i Error occurred in the 2nd layer.
Caused by error in `get()`:
! object 'species' not found
debug(make_penguin_plot)
👀 the first column name…
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours,
grouping_variable = "species") {
mean_x_y <- df |>
group_by(get(grouping_variable)) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(get(grouping_variable)))
df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = get(grouping_variable),
size = body_mass_g),
size = 5,
alpha = 0.9,
show.legend = FALSE) +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", `get(grouping_variable)`, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
labs(title = paste0("Flipper lengths are proportional to bill lengths within each ",
grouping_variable),
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
}
Rather than grouping by species, can we group by island?
We need more colours!
Actually, can we just look at a few different grouping options?
Let’s make the labels optional!
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours,
grouping_variable = "species",
add_labels = TRUE) {
mean_x_y <- df |>
group_by(get(grouping_variable)) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(get(grouping_variable)))
unlabelled_plot <- df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = get(grouping_variable),
size = body_mass_g),
size = 5,
alpha = 0.9) +
# Reinstated the legend
labs(title = paste0("Flipper lengths are proportional to bill lengths within each ",
grouping_variable),
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
if(add_labels == TRUE) {
unlabelled_plot +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", `get(grouping_variable)`, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
theme(legend.position = "none")
} else {
unlabelled_plot
}
}
Actually, can we just look at a few different grouping options?
Ok, now split by island within each sex.
Switching off the labels
Add an optional subtitle
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours,
grouping_variable = "species",
subtitle = NA,
add_labels = TRUE) {
mean_x_y <- df |>
group_by(get(grouping_variable)) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(get(grouping_variable)))
unlabelled_plot <- df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = get(grouping_variable),
size = body_mass_g),
size = 5,
alpha = 0.9) +
# Reinstated the legend
labs(title = paste0("Flipper lengths are proportional to bill lengths within each ",
grouping_variable),
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours)
if(add_labels == TRUE) {
plot_to_export <- unlabelled_plot +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", `get(grouping_variable)`, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
theme(legend.position = "none")
} else {
plot_to_export <- unlabelled_plot
}
if(!is.na(subtitle)) {
plot_to_export +
labs(subtitle = subtitle)
} else {
plot_to_export
}
}
Add optional subtitle
The magic of ...
make_penguin_plot <- function(df = palmerpenguins::penguins,
colours = penguin_colours,
grouping_variable = "species",
subtitle = NA,
add_labels = TRUE,
...) {
mean_x_y <- df |>
group_by(get(grouping_variable)) |>
summarise(mean_x = mean(bill_length_mm, na.rm = TRUE),
mean_y = mean(flipper_length_mm, na.rm = TRUE),
mean_weight = mean(body_mass_g, na.rm = TRUE),
count = length(get(grouping_variable)))
unlabelled_plot <- df |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = flipper_length_mm,
colour = get(grouping_variable),
size = body_mass_g),
size = 5,
alpha = 0.9) +
# Reinstated the legend
labs(title = paste0("Flipper lengths are proportional to bill lengths within each ",
grouping_variable),
x = "Bill length (mm)",
y = "Flipper length (mm)",
colour = "") +
theme_dt_demo(16) +
scale_colour_manual(values = colours) +
theme(...)
if(add_labels == TRUE) {
plot_to_export <- unlabelled_plot +
ggtext::geom_textbox(data = mean_x_y,
aes(x = mean_x,
y = mean_y,
label = paste0("**", `get(grouping_variable)`, "**",
" (N = ", count, ")",
"<br>Mean weight: ", janitor::round_half_up(mean_weight/1000, 2), "kg")),
size = 7,
family = "Work Sans",
width = unit(20, "lines"),
fill = "#FEFDFA",
box.colour = NA,
alpha = 0.9,
halign = 0.5) +
theme(legend.position = "none")
} else {
plot_to_export <- unlabelled_plot
}
if(!is.na(subtitle)) {
plot_to_export +
labs(subtitle = subtitle)
} else {
plot_to_export
}
}
The magic of ...
A more sensible use case…