Skip to content
Coming soon: Ally. Your guide to the world of AI and R. Learn More →
R for the Rest of Us Logo

Going Deeper with R

Functions

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

View code shown in video
# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import, Tidy, and Clean Data --------------------------------------------

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import, Tidy, and Clean Data -------------------------------------------

clean_math_proficiency_data <- function(raw_data) {
  read_excel(
    path = raw_data
  ) |>
    clean_names() |>
    filter(student_group == "Total Population (All Students)") |>
    filter(grade_level == "Grade 3") |>
    select(academic_year, school_id, contains("number_level")) |>
    pivot_longer(
      cols = starts_with("number_level"),
      names_to = "proficiency_level",
      values_to = "number_of_students"
    ) |>
    mutate(proficiency_level = parse_number(proficiency_level)) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    mutate(
      pct = number_of_students / sum(number_of_students, na.rm = TRUE),
      .by = school_id
    )
}


third_grade_math_proficiency_2021_2022 <-
  clean_math_proficiency_data(
    raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx"
  )

third_grade_math_proficiency_2018_2019 <-
  clean_math_proficiency_data(
    raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx"
  )

third_grade_math_proficiency <-
  bind_rows(
    third_grade_math_proficiency_2021_2022,
    third_grade_math_proficiency_2018_2019
  )

Your Turn

  1. Create a function to clean each year of enrollment data.

  2. To check that your function works, create enrollment_by_race_ethnicity_2021_2022 and enrollment_by_race_ethnicity_2022_2023 data frames and then bind them together with bind_rows().

This exercise is challenging! Use the starter code below to help you if you need to.

# Load Packages ----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directory -------------------------------------------------------

dir_create("data-raw")

# Download Data ----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file(
#   url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )

# Tidy and Clean Function ------------------------------------------------

clean_enrollment_data <- function(excel_file, sheet_name) {
  read_excel(
    path = YOURCODEHERE,
    sheet = YOURCODEHERE
  ) |>
    clean_names() |>
    select(
      1,
      3,
      7:19
    ) |>
    select(-contains("percent")) |>
    set_names(
      "district_institution_id",
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
    ) |>
    pivot_longer(
      cols = -c(district_institution_id, school_institution_id),
      names_to = "race_ethnicity",
      values_to = "number_of_students"
    ) |>
    mutate(
      race_ethnicity = recode_values(
        race_ethnicity,
        "american_indian_alaska_native" ~ "American Indian Alaska Native",
        "asian" ~ "Asian",
        "black_african_american" ~ "Black/African American",
        "hispanic_latino" ~ "Hispanic/Latino",
        "multiracial" ~ "Multi-Racial",
        "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
        "white" ~ "White",
        "multi_racial" ~ "Multiracial"
      )
    ) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    group_by(district_institution_id, race_ethnicity) |>
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |>
    ungroup() |>
    group_by(district_institution_id) |>
    mutate(pct = number_of_students / sum(number_of_students)) |>
    ungroup() |>
    mutate(year = sheet_name) |>
    mutate(year = str_remove(year, "School "))
}

# Tidy and Clean Data ----------------------------------------------------

enrollment_by_race_ethnicity_2022_2023 <-
  clean_enrollment_data(
    excel_file = YOURCODEHERE,
    sheet_name = YOURCODEHERE
  )

enrollment_by_race_ethnicity_2021_2022 <-
  clean_enrollment_data(
    excel_file = YOURCODEHERE,
    sheet_name = YOURCODEHERE
  )

bind_rows(
  YOURCODEHERE,
  YOURCODEHERE
)

Have any questions? Put them below and we will help you out!

You need to be signed-in to comment on this post. Login.

Alberto Cabrera

Alberto Cabrera • April 20, 2026

The starter code is in error. It assumes that the 7 columns reporting enrollment by race/ethnicity are in numeric order. After several trials, I realized that each enrollment column is followed by a column reporting enrollment by percentages. The correct select script should read: select(1, 3, 7, 9, 11, 13, 15, 17, 19) |> instead of select( 1, 3, 7:19 ) |>

Below, please find the function that worked:

clean_enrollment_data <- function(excel_file, sheet_name) { read_excel( path = excel_file, sheet = sheet_name ) |> clean_names() |> select(1, 3, 7, 9, 11, 13, 15, 17, 19) |> select(-contains("percent")) |> set_names( "district_institution_id", "school_institution_id", "american_indian_alaska_native", #(7) "asian", #(9) "native_hawaiian_pacific_islander", #(11) "black_african_american", #(13) "hispanic_latino", #(15) "white", #(17) "multiracial", #(19) ) |> pivot_longer( cols = -c(district_institution_id, school_institution_id), names_to = "race_ethnicity", values_to = "number_of_students" ) |> mutate( race_ethnicity = recode_values( race_ethnicity, "american_indian_alaska_native" ~ "American Indian Alaska Native", "asian" ~ "Asian", "black_african_american" ~ "Black/African American", "hispanic_latino" ~ "Hispanic/Latino", "multiracial" ~ "Multi-Racial", "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander", "white" ~ "White", "multi_racial" ~ "Multiracial" ) ) |> mutate(number_of_students = parse_number(number_of_students)) |> group_by(district_institution_id, race_ethnicity) |> summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |> ungroup() |> group_by(district_institution_id) |> mutate(pct = number_of_students / sum(number_of_students, na.rm = TRUE)) |> ungroup() |> mutate(year = sheet_name) |> mutate(year = str_remove(year, "School ")) }

Alberto Cabrera

Alberto Cabrera • April 20, 2026

Correction to my previous note.

Watching the solution, I realized the select(1, 3, 7:19) |> option suggested works provided it is followed by select(-contains("percent")), which excludes all columns in between that report enrollment in percentages. I should have watched the solution. It would have made me realize I forgot to include select().

Sorry for the confusion.

Course Content

44 Lessons