Coder Social home page Coder Social logo

r-data-recipes's Introduction

r-data-recipes

A family cookbook of data Recipes.

Row totals

  • Calculate row totals using rowwise(), do(), and case_when()
# setup
library(tidyverse)
df <- data_frame(
  id = c(1,2,3,4),
  v1 = c(NA,1,1,NA),
  v2 = c(NA,1,NA,NA),
  v3 = c(NA,1,1,NA)
)
# mutate with if_else doesn't work
a <- df %>% mutate(v_sum =
    if_else(
        !(is.na(v1) & is.na(v2) & is.na(v3)),
        sum(v1, v2, v3, na.rm = TRUE), 
        if_else(
            is.na(v1) & is.na(v2) & is.na(v3),
            99,
            NA_real_)
    ))
# mutate with case_when doesn't work
b <- df %>% mutate(v_sum =
    case_when(
        !(is.na(v1) & is.na(v2) & is.na(v3)) ~ sum(v1, v2, v3, na.rm = TRUE),
        is.na(v1) & is.na(v2) & is.na(v3) ~ 99,
        TRUE ~ NA_real_)
)
# rowwise and do with case_when works, but requires binding the result back to the original data frame
c <- df %>% rowwise %>% do(v_sum =
case_when(
        !(is.na(.$v1) & is.na(.$v2) & is.na(.$v3)) ~ sum(.$v1, .$v2, .$v3, na.rm = TRUE),
        is.na(.$v1) & is.na(.$v2) & is.na(.$v3) ~ 99,
        TRUE ~ NA_real_)
) %>% unlist %>% as_tibble %>% bind_cols(df, .)

Partial string matching

  • %gin%: A reimagination of %in% using grepl() for partial string matching
# define %gin%
"%gin%" <- function(pattern, x) grepl(pattern, x)
# %in% evaluates to FALSE because it looks for full string matches
"a" %in% "apple"
# %gin% evaluates to TRUE
"a" %gin% "apple"

Keep distinct categories

  • Keep distinct instances of a category using if_else() and distinct()
# setup
library(tidyverse)
df <- data_frame(
  category = c("% proficient", "% proficient", "% proficient", "n proficient", "n proficient", "n proficient"),
  race = c("YES", "NO", "NO", "YES", "NO", "NO"),
  gender = c("NO", "NO", "YES", "NO", "NO", "NO"),
  frpl = c("NO", "NO", "NO", "NO", "YES", "NO"),
  race.x.gender = c("NO", "NO", "NO", "NO", "NO", "NO")
)
# answer 1
df %>%
  group_by(category) %>%
  mutate(
    race = TRUE & "YES" %in% race,
    gender = TRUE & "YES" %in% gender,
    frpl = TRUE & "YES" %in% frpl,
    race.x.gender = TRUE & "YES" %in% race.x.gender
  ) %>%
  distinct(.keep_all = TRUE)
# answer 2
df %>%
  group_by(category) %>%
  mutate(
    race = if_else("YES" %in% race, "YES", "NO"),
    gender = if_else("YES" %in% gender, "YES", "NO"),
    frpl = if_else("YES" %in% frpl, "YES", "NO"),
    race.x.gender = if_else("YES" %in% race.x.gender, "YES", "NO")
  ) %>%
  distinct(.keep_all = TRUE)

Reinstall packages after a major R update

  • Reinstall packages from your previous library after a major R update. This will work even if upgrading from R 3.x to 4.x. Note that RStudio may prompt you to restart R repeatedly; to keep the script going keep pressing "No" when this happens.
# setup
if (!require(tidyverse)) install.packages("tidyverse")
if (!require(fs)) install.packages("fs")
library(tidyverse)
library(fs)
# get all installed R versions
if (Sys.info()[["sysname"]] == "Darwin") { 
  r_dir <- tibble::tibble(path = fs::dir_ls(fs::path_dir(fs::path_dir(fs::path_dir(.libPaths()[[1]])))))
}
if (Sys.info()[["sysname"]] %in% c("Linux", "Windows")) {
  r_dir <- tibble::tibble(path = fs::dir_ls(fs::path_dir(.libPaths()[[1]])))
}
# cue music
r_dir <- r_dir %>%
  # drop current R version
  dplyr::filter(!(stringr::str_detect(path, "Current"))) %>%
  # extract the current and penultimate R versions as strings
  dplyr::rowwise() %>%
  dplyr::mutate(version = as.numeric(stringr::str_extract(path, "[0-9]\\.[0-9]"))) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(new_r = dplyr::nth(version, -1L), old_r = dplyr::nth(version, -2L)) %>%
  dplyr::mutate_at(vars("new_r", "old_r"), ~as.character(formatC(.x, digits = 1L, format = "f"))) %>%
  dplyr::filter(version == old_r)
# get new and old R library paths
new_libpath <- .libPaths()
old_libpath <- stringr::str_replace(new_libpath, r_dir$new_r, r_dir$old_r)
# get list of old installed R packages
pkg_list <- as.list(list.files(old_libpath))
# define install_all() function
install_all <- function(x) {
  print(x)
  install.packages(x, quiet = TRUE)
}
# install all R packages in pkg_list
purrr::quietly(purrr::walk(pkg_list, install_all))

Unnest all list-cols into columns

  • Unnest all list-cols in a data frame into columns for each unique element.
# setup
suppressPackageStartupMessages(library(dplyr))
library(purrr)
library(stats)
library(tibble)
library(tidyr)
# create `a`, a tbl with 1 list-col, note "four" is not lined up with "five" in row 3
a <- tibble::tribble(
  ~v1,     ~v2,
  "one",   c("four", "five", "six"),
  "two",   NA_character_,
  "three", "five"
)
# create `b`, an even more complicated tbl with 2 list-cols, separated by an atomic v3
b <- tibble::tribble(
  ~v1,     ~v2,                      ~v3,    ~v4,
  "one",   c("four", "five", "six"), "four", c("four", "five", "six"),
  "two",   NA_character_,            "five", "four",
  "three", "five",                   "six",  "six"
)
# implement `unnest_wide()`
unnest_wide <- function(.data) {
  stopifnot(is.data.frame(.data))
  .data <- tibble::rowid_to_column(.data)
  lst_index <- purrr::map_int(.data, is.list)
  lst_cols <- names(lst_index)[lst_index == 1L]
  lst_vals <- paste0(lst_cols, ".")
  unique_vals <- vector("list", length(lst_cols))
  tmp <- vector("list", length(lst_cols))
  for (i in seq_along(lst_cols)) {
    unique_vals[[i]] <- stats::na.omit(unique(unlist(.data[[lst_cols[i]]])))
    tmp[[i]] <- dplyr::select(.data, rowid, lst_cols[i])
    tmp[[i]] <- dplyr::mutate(tmp[[i]], !!lst_vals[i] := .data[[lst_cols[i]]])
    tmp[[i]] <- tidyr::unnest(tmp[[i]])
    tmp[[i]] <- dplyr::mutate(tmp[[i]], !!lst_cols[i] := match(tmp[[i]][[lst_cols[i]]], unique_vals[[i]]))
    tmp[[i]] <- tidyr::spread(tmp[[i]], !!lst_cols[i], !!lst_vals[i], convert = TRUE, sep = "_")
    tmp[[i]] <- dplyr::select_if(tmp[[i]], !grepl(paste0(lst_cols[i], "_NA"), colnames(tmp[[i]])))
    .data <- dplyr::select(.data, -(!!lst_cols[i]))
    .data <- dplyr::left_join(.data, tmp[[i]], by = "rowid")
  }
  .data <- dplyr::select(.data, -rowid)
  return(.data)
}
# run on `a` and `b`
(a_wide <- unnest_wide(a))
#> # A tibble: 3 x 4
#>   v1    v2_1  v2_2  v2_3 
#>   <chr> <chr> <chr> <chr>
#> 1 one   four  five  six  
#> 2 two   <NA>  <NA>  <NA> 
#> 3 three <NA>  five  <NA>
(b_wide <- unnest_wide(b))
#> # A tibble: 3 x 8
#>   v1    v3    v2_1  v2_2  v2_3  v4_1  v4_2  v4_3 
#>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 one   four  four  five  six   four  five  six  
#> 2 two   five  <NA>  <NA>  <NA>  four  <NA>  <NA> 
#> 3 three six   <NA>  five  <NA>  <NA>  <NA>  six

Created on 2018-07-25 by the reprex package (v0.2.0).

r-data-recipes's People

Contributors

gvelasq avatar ivelasq avatar

Watchers

James Cloos avatar

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.