Tidy Data Rule #3: Every Cell is a Single Value

This lesson is called Tidy Data Rule #3: Every Cell is a Single Value, part of the Going Deeper with R course. This lesson is called Tidy Data Rule #3: Every Cell is a Single Value, part of the Going Deeper with R course.

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

View code shown in video

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(janitor)
library(readxl)
library(fs)

# Create Directory --------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# download.file(url = "https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import Data -------------------------------------------------------------

math_scores_2021_2022 <-
  read_excel("data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx") |>
  clean_names()

third_grade_math_proficiency_2021_2022 <-
  math_scores_2021_2022 |>
  filter(student_group == "Total Population (All Students)") |>
  filter(grade_level == "Grade 3") |>
  select(academic_year, school_id, contains("number_level_")) |>
  pivot_longer(
    cols = contains("number_level_"),
    names_to = "proficiency_level",
    values_to = "number_of_students"
  )

third_grade_math_proficiency_2021_2022 |>
  mutate(
    proficiency_level = str_remove(proficiency_level, pattern = "number_level_")
  )

third_grade_math_proficiency_2021_2022 |>
  mutate(
    proficiency_level = recode_values(
      proficiency_level,
      "number_level_4" ~ "4",
      "number_level_3" ~ "3",
      "number_level_2" ~ "2",
      "number_level_1" ~ "1"
    )
  )

third_grade_math_proficiency_2021_2022 |>
  mutate(
    proficiency_level = if_else(
      proficiency_level == "number_level_4",
      true = "4",
      false = proficiency_level
    )
  ) |>
  mutate(
    proficiency_level = if_else(
      proficiency_level == "number_level_3",
      true = "3",
      false = proficiency_level
    )
  ) |>
  mutate(
    proficiency_level = if_else(
      proficiency_level == "number_level_2",
      true = "2",
      false = proficiency_level
    )
  ) |>
  mutate(
    proficiency_level = if_else(
      proficiency_level == "number_level_1",
      true = "1",
      false = proficiency_level
    )
  )

third_grade_math_proficiency_2021_2022 |>
  mutate(
    proficiency_level = case_when(
      proficiency_level == "number_level_4" ~ "4",
      proficiency_level == "number_level_3" ~ "3",
      proficiency_level == "number_level_2" ~ "2",
      proficiency_level == "number_level_1" ~ "1"
    )
  )

third_grade_math_proficiency_2021_2022 |>
  mutate(proficiency_level = parse_number(proficiency_level))

Your Turn

Solution

# Load Packages ----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directory -------------------------------------------------------

dir_create("data-raw")

# Download Data ----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file(
#   url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )

# Import Data ------------------------------------------------------------

enrollment_2022_2023 <-
  read_excel(
    path = "data-raw/fallmembershipreport_20222023.xlsx",
    sheet = "School 2022-23"
  ) |>
  clean_names()

# Tidy and Clean Data ----------------------------------------------------

enrollment_by_race_ethnicity_2022_2023 <-
  enrollment_2022_2023 |>
  select(
    district_institution_id,
    school_institution_id,
    x2022_23_american_indian_alaska_native:x2022_23_multi_racial
  ) |>
  select(-contains("percent")) |>
  pivot_longer(
    cols = -c(district_institution_id, school_institution_id),
    names_to = "race_ethnicity",
    values_to = "number_of_students"
  )

enrollment_by_race_ethnicity_2022_2023 |>
  mutate(race_ethnicity = str_remove(race_ethnicity, "x2022_23_")) |>
  mutate(
    race_ethnicity = recode_values(
      race_ethnicity,
      "american_indian_alaska_native" ~ "American Indian Alaska Native",
      "asian" ~ "Asian",
      "black_african_american" ~ "Black/African American",
      "hispanic_latino" ~ "Hispanic/Latino",
      "multiracial" ~ "Multi-Racial",
      "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
      "white" ~ "White",
      "multi_racial" ~ "Multiracial"
    )
  )

enrollment_by_race_ethnicity_2022_2023 |>
  mutate(race_ethnicity = str_remove(race_ethnicity, "x2022_23_")) |>
  mutate(
    race_ethnicity = case_when(
      race_ethnicity ==
        "american_indian_alaska_native" ~ "American Indian Alaska Native",
      race_ethnicity == "asian" ~ "Asian",
      race_ethnicity == "black_african_american" ~ "Black/African American",
      race_ethnicity == "hispanic_latino" ~ "Hispanic/Latino",
      race_ethnicity == "multiracial" ~ "Multi-Racial",
      race_ethnicity ==
        "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
      race_ethnicity == "white" ~ "White",
      race_ethnicity == "multi_racial" ~ "Multiracial"
    )
  )

Remove the “x2022_23_” portion of the race_ethnicity variable using str_remove().
Convert all instances of the race_ethnicity variable to more meaningful observations (e.g. turn “american_indian_alaska_native” into “American Indian/Alaska Native”) using recode_values(), if_else(), or case_when().

Use the following starter code to help you if necessary.

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20222023.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20212022.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20202021.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20192020.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20182019.xlsx")

# Import Data -------------------------------------------------------------

enrollment_2022_2023 <- read_excel(path = "data-raw/fallmembershipreport_20222023.xlsx",
                                   sheet = "School 2022-23") |> 
  clean_names()

# Tidy and Clean Data -----------------------------------------------------

enrollment_by_race_ethnicity_2022_2023 <-
  enrollment_2022_2023 |> 
  select(district_institution_id, x2022_23_american_indian_alaska_native:x2022_23_percent_multi_racial) |> 
  select(-contains("percent")) |> 
  pivot_longer(cols = -district_institution_id,
               names_to = "race_ethnicity",
               values_to = "number_of_students")

Learn More

Bob Rudis has a semi-complex article on how he uses case_when() to work with data related to his work on internet security.

You might also find this video by Sharon Machlis on case_when() helpful.

And, in case you think I’m the only one who loves case_when() check out this love letter to the function by Matt Kerlogue.

Have any questions? Put them below and we will help you out!

You need to be signed-in to comment on this post. Login.

Alex Campbell • April 16, 2026

I'm having issues with the recode_values() function. When I try to execute the code using the recode_values function, positron says it can't find the function. But when I use case_when(), I have no issues. What's confusing is that both functions are part of the dplyr package, so I can't figure out why positron is able to find one and not the other.

Here's the code:

enrollment_2022_2023_raceethnic <- enrollment_2022_2023 |> select( district_institution_id, school_institution_id, contains("x2022_23") ) |> select(-contains("percent"), -contains("grade"), -contains("total"), -contains("kindergarten")) |> pivot_longer( cols = c(-district_institution_id, -school_institution_id), names_to = "race_ethnicity", values_to = "number_of_students" ) |> mutate(race_ethnicity = str_remove(race_ethnicity, pattern = "x2022_23_")) |> mutate( race_ethnicity = recode_values( race_ethnicity, "american_indian_alaska_native" ~ "American Indian/Alaska Native", "asian" ~ "Asian", "native_hawaiian_pacific_islander" ~ "Native Hawaiian/Pacific Islander", "black_african_american" ~ "Black", "hispanic_latino" ~ "Latine", "white" ~ "White", "multi_racial" ~ "Multi-Racial" ) )

The error I get is:

Error in 'mutate()' at misc/code/week4lessoncode.R:82:1: i In argument: 'race_ethnicity = recode_values(...)'. Caused by error in 'recode_values()': ! could not find function recode_values"

Gracielle Higino Coach • April 16, 2026

Hi Alex! The recode_values() is a very new function released in the latest version of {dplyr}. Make sure you have the latest version of this package installed (1.2.1), and if not, try installing the package again in a new R session.