Tidy Data Rule #3: Every Cell is a Single Value
This lesson is called Tidy Data Rule #3: Every Cell is a Single Value, part of the Going Deeper with R course. This lesson is called Tidy Data Rule #3: Every Cell is a Single Value, part of the Going Deeper with R course.
Transcript
Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.
Loading transcript...
View code shown in video
# Load Packages -----------------------------------------------------------
library(tidyverse)
library(janitor)
library(readxl)
library(fs)
# Create Directory --------------------------------------------------------
dir_create("data-raw")
# Download Data -----------------------------------------------------------
# download.file(url = "https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")
# Import Data -------------------------------------------------------------
math_scores_2021_2022 <-
read_excel("data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx") |>
clean_names()
third_grade_math_proficiency_2021_2022 <-
math_scores_2021_2022 |>
filter(student_group == "Total Population (All Students)") |>
filter(grade_level == "Grade 3") |>
select(academic_year, school_id, contains("number_level_")) |>
pivot_longer(
cols = contains("number_level_"),
names_to = "proficiency_level",
values_to = "number_of_students"
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = str_remove(proficiency_level, pattern = "number_level_")
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = recode_values(
proficiency_level,
"number_level_4" ~ "4",
"number_level_3" ~ "3",
"number_level_2" ~ "2",
"number_level_1" ~ "1"
)
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_4",
true = "4",
false = proficiency_level
)
) |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_3",
true = "3",
false = proficiency_level
)
) |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_2",
true = "2",
false = proficiency_level
)
) |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_1",
true = "1",
false = proficiency_level
)
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = case_when(
proficiency_level == "number_level_4" ~ "4",
proficiency_level == "number_level_3" ~ "3",
proficiency_level == "number_level_2" ~ "2",
proficiency_level == "number_level_1" ~ "1"
)
)
third_grade_math_proficiency_2021_2022 |>
mutate(proficiency_level = parse_number(proficiency_level))
Your Turn
Remove the “x2022_23_” portion of the
race_ethnicityvariable usingstr_remove().Convert all instances of the
race_ethnicityvariable to more meaningful observations (e.g. turn “american_indian_alaska_native” into “American Indian/Alaska Native”) usingrecode_values(),if_else(), orcase_when().
Use the following starter code to help you if necessary.
# Load Packages -----------------------------------------------------------
library(tidyverse)
library(fs)
library(readxl)
library(janitor)
# Create Directories ------------------------------------------------------
dir_create("data-raw")
# Download Data -----------------------------------------------------------
# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20222023.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20212022.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20202021.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20192020.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20182019.xlsx")
# Import Data -------------------------------------------------------------
enrollment_2022_2023 <- read_excel(path = "data-raw/fallmembershipreport_20222023.xlsx",
sheet = "School 2022-23") |>
clean_names()
# Tidy and Clean Data -----------------------------------------------------
enrollment_by_race_ethnicity_2022_2023 <-
enrollment_2022_2023 |>
select(district_institution_id, x2022_23_american_indian_alaska_native:x2022_23_percent_multi_racial) |>
select(-contains("percent")) |>
pivot_longer(cols = -district_institution_id,
names_to = "race_ethnicity",
values_to = "number_of_students")
Learn More
Bob Rudis has a semi-complex article on how he uses case_when() to work with data related to his work on internet security.
You might also find this video by Sharon Machlis on case_when() helpful.
And, in case you think I’m the only one who loves case_when() check out this love letter to the function by Matt Kerlogue.
Have any questions? Put them below and we will help you out!
Course Content
44 Lessons
You need to be signed-in to comment on this post. Login.
Alex Campbell • April 16, 2026
I'm having issues with the recode_values() function. When I try to execute the code using the recode_values function, positron says it can't find the function. But when I use case_when(), I have no issues. What's confusing is that both functions are part of the dplyr package, so I can't figure out why positron is able to find one and not the other.
Here's the code:
enrollment_2022_2023_raceethnic <- enrollment_2022_2023 |> select( district_institution_id, school_institution_id, contains("x2022_23") ) |> select(-contains("percent"), -contains("grade"), -contains("total"), -contains("kindergarten")) |> pivot_longer( cols = c(-district_institution_id, -school_institution_id), names_to = "race_ethnicity", values_to = "number_of_students" ) |> mutate(race_ethnicity = str_remove(race_ethnicity, pattern = "x2022_23_")) |> mutate( race_ethnicity = recode_values( race_ethnicity, "american_indian_alaska_native" ~ "American Indian/Alaska Native", "asian" ~ "Asian", "native_hawaiian_pacific_islander" ~ "Native Hawaiian/Pacific Islander", "black_african_american" ~ "Black", "hispanic_latino" ~ "Latine", "white" ~ "White", "multi_racial" ~ "Multi-Racial" ) )
The error I get is:
Error in 'mutate()' at misc/code/week4lessoncode.R:82:1: i In argument: 'race_ethnicity = recode_values(...)'. Caused by error in 'recode_values()': ! could not find function recode_values"
Gracielle Higino Coach • April 16, 2026
Hi Alex! The
recode_values()is a very new function released in the latest version of{dplyr}. Make sure you have the latest version of this package installed (1.2.1), and if not, try installing the package again in a new R session.