Functions

This lesson is called Functions, part of the Going Deeper with R course. This lesson is called Functions, part of the Going Deeper with R course.

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

View code shown in video

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import, Tidy, and Clean Data --------------------------------------------

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import, Tidy, and Clean Data -------------------------------------------

clean_math_proficiency_data <- function(raw_data) {
  read_excel(
    path = raw_data
  ) |>
    clean_names() |>
    filter(student_group == "Total Population (All Students)") |>
    filter(grade_level == "Grade 3") |>
    select(academic_year, school_id, contains("number_level")) |>
    pivot_longer(
      cols = starts_with("number_level"),
      names_to = "proficiency_level",
      values_to = "number_of_students"
    ) |>
    mutate(proficiency_level = parse_number(proficiency_level)) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    mutate(
      pct = number_of_students / sum(number_of_students, na.rm = TRUE),
      .by = school_id
    )
}


third_grade_math_proficiency_2021_2022 <-
  clean_math_proficiency_data(
    raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx"
  )

third_grade_math_proficiency_2018_2019 <-
  clean_math_proficiency_data(
    raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx"
  )

third_grade_math_proficiency <-
  bind_rows(
    third_grade_math_proficiency_2021_2022,
    third_grade_math_proficiency_2018_2019
  )

Your Turn

Solution

# Load Packages ----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directory -------------------------------------------------------

dir_create("data-raw")

# Download Data ----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file(
#   url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )

# Tidy and Clean Function ------------------------------------------------

clean_enrollment_data <- function(excel_file, sheet_name) {
  read_excel(
    path = excel_file,
    sheet = sheet_name
  ) |>
    clean_names() |>
    select(
      1,
      3,
      7:19
    ) |>
    select(-contains("percent")) |>
    set_names(
      "district_institution_id",
      "school_institution_id",
      "american_indian_alaska_native",
      "asian",
      "native_hawaiian_pacific_islander",
      "black_african_american",
      "hispanic_latino",
      "white",
      "multi_racial"
    ) |>
    pivot_longer(
      cols = -c(district_institution_id, school_institution_id),
      names_to = "race_ethnicity",
      values_to = "number_of_students"
    ) |>
    mutate(
      race_ethnicity = recode_values(
        race_ethnicity,
        "american_indian_alaska_native" ~ "American Indian Alaska Native",
        "asian" ~ "Asian",
        "black_african_american" ~ "Black/African American",
        "hispanic_latino" ~ "Hispanic/Latino",
        "multiracial" ~ "Multi-Racial",
        "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
        "white" ~ "White",
        "multi_racial" ~ "Multiracial"
      )
    ) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    group_by(district_institution_id, race_ethnicity) |>
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |>
    ungroup() |>
    group_by(district_institution_id) |>
    mutate(pct = number_of_students / sum(number_of_students)) |>
    ungroup() |>
    mutate(year = sheet_name) |>
    mutate(year = str_remove(year, "School "))
}

# Tidy and Clean Data ----------------------------------------------------

enrollment_by_race_ethnicity_2022_2023 <-
  clean_enrollment_data(
    excel_file = "data-raw/fallmembershipreport_20222023.xlsx",
    sheet_name = "School 2022-23"
  )

enrollment_by_race_ethnicity_2021_2022 <-
  clean_enrollment_data(
    excel_file = "data-raw/fallmembershipreport_20212022.xlsx",
    sheet_name = "School 2021-22"
  )

bind_rows(
  enrollment_by_race_ethnicity_2021_2022,
  enrollment_by_race_ethnicity_2022_2023
) |>
  view()

Create a function to clean each year of enrollment data.
To check that your function works, create enrollment_by_race_ethnicity_2021_2022 and enrollment_by_race_ethnicity_2022_2023 data frames and then bind them together with bind_rows().

This exercise is challenging! Use the starter code below to help you if you need to.

# Load Packages ----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directory -------------------------------------------------------

dir_create("data-raw")

# Download Data ----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file(
#   url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )

# Tidy and Clean Function ------------------------------------------------

clean_enrollment_data <- function(excel_file, sheet_name) {
  read_excel(
    path = YOURCODEHERE,
    sheet = YOURCODEHERE
  ) |>
    clean_names() |>
    select(
      1,
      3,
      7:19
    ) |>
    select(-contains("percent")) |>
    set_names(
      "district_institution_id",
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
    ) |>
    pivot_longer(
      cols = -c(district_institution_id, school_institution_id),
      names_to = "race_ethnicity",
      values_to = "number_of_students"
    ) |>
    mutate(
      race_ethnicity = recode_values(
        race_ethnicity,
        "american_indian_alaska_native" ~ "American Indian Alaska Native",
        "asian" ~ "Asian",
        "black_african_american" ~ "Black/African American",
        "hispanic_latino" ~ "Hispanic/Latino",
        "multiracial" ~ "Multi-Racial",
        "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
        "white" ~ "White",
        "multi_racial" ~ "Multiracial"
      )
    ) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    group_by(district_institution_id, race_ethnicity) |>
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |>
    ungroup() |>
    group_by(district_institution_id) |>
    mutate(pct = number_of_students / sum(number_of_students)) |>
    ungroup() |>
    mutate(year = sheet_name) |>
    mutate(year = str_remove(year, "School "))
}

# Tidy and Clean Data ----------------------------------------------------

enrollment_by_race_ethnicity_2022_2023 <-
  clean_enrollment_data(
    excel_file = YOURCODEHERE,
    sheet_name = YOURCODEHERE
  )

enrollment_by_race_ethnicity_2021_2022 <-
  clean_enrollment_data(
    excel_file = YOURCODEHERE,
    sheet_name = YOURCODEHERE
  )

bind_rows(
  YOURCODEHERE,
  YOURCODEHERE
)

Learn More

Have any questions? Put them below and we will help you out!

You need to be signed-in to comment on this post. Login.

Alberto Cabrera • April 20, 2026

The starter code is in error. It assumes that the 7 columns reporting enrollment by race/ethnicity are in numeric order. After several trials, I realized that each enrollment column is followed by a column reporting enrollment by percentages. The correct select script should read: select(1, 3, 7, 9, 11, 13, 15, 17, 19) |> instead of select( 1, 3, 7:19 ) |>

Below, please find the function that worked:

clean_enrollment_data <- function(excel_file, sheet_name) { read_excel( path = excel_file, sheet = sheet_name ) |> clean_names() |> select(1, 3, 7, 9, 11, 13, 15, 17, 19) |> select(-contains("percent")) |> set_names( "district_institution_id", "school_institution_id", "american_indian_alaska_native", #(7) "asian", #(9) "native_hawaiian_pacific_islander", #(11) "black_african_american", #(13) "hispanic_latino", #(15) "white", #(17) "multiracial", #(19) ) |> pivot_longer( cols = -c(district_institution_id, school_institution_id), names_to = "race_ethnicity", values_to = "number_of_students" ) |> mutate( race_ethnicity = recode_values( race_ethnicity, "american_indian_alaska_native" ~ "American Indian Alaska Native", "asian" ~ "Asian", "black_african_american" ~ "Black/African American", "hispanic_latino" ~ "Hispanic/Latino", "multiracial" ~ "Multi-Racial", "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander", "white" ~ "White", "multi_racial" ~ "Multiracial" ) ) |> mutate(number_of_students = parse_number(number_of_students)) |> group_by(district_institution_id, race_ethnicity) |> summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |> ungroup() |> group_by(district_institution_id) |> mutate(pct = number_of_students / sum(number_of_students, na.rm = TRUE)) |> ungroup() |> mutate(year = sheet_name) |> mutate(year = str_remove(year, "School ")) }

Alberto Cabrera • April 20, 2026

Correction to my previous note.

Watching the solution, I realized the select(1, 3, 7:19) |> option suggested works provided it is followed by select(-contains("percent")), which excludes all columns in between that report enrollment in percentages. I should have watched the solution. It would have made me realize I forgot to include select().

Sorry for the confusion.