Interactivity and Dashboards


DUMMY CONTENT


Data Wrangling

Edges from grouped data

As an example let’s look at the Freedom of Information tidytuesday dataset.

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.1     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
fair_use_cases <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-08-29/fair_use_cases.csv')
Rows: 251 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): case, court, jurisdiction, categories, outcome
dbl (1): year
lgl (1): fair_use_found

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fair_use_findings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-08-29/fair_use_findings.csv')
Rows: 251 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (9): title, case_number, year, court, key_facts, issue, holding, tags, o...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
case_categories <- fair_use_cases %>% 
  separate_longer_delim(categories,
                        delim = ";") %>% 
  mutate(categories = str_trim(categories),
         categories = str_to_sentence(categories)) %>% 
  select(case, categories) %>% 
  mutate(n_category = n(),
         .by = categories) %>% 
  filter(n_category >= 10)

case_categories
# A tibble: 516 × 3
   case                                                    categories n_category
   <chr>                                                   <chr>           <int>
 1 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)    Education…         56
 2 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)    Photograph         56
 3 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. M… Painting/…         31
 4 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. M… Photograph         56
 5 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Film/audi…         59
 6 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Music              25
 7 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Parody/sa…         33
 8 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Review/co…         41
 9 Am. Soc'y for Testing & Materials v. Public.Resource.O… Education…         56
10 Am. Soc'y for Testing & Materials v. Public.Resource.O… Textual w…         93
# ℹ 506 more rows

I want to create a graph where the nodes are categories of case and edges indicate that the categories appear together in a case. To start with we need nodes with IDs:

nodes_categories <- case_categories %>% 
  distinct(categories) %>% 
  arrange(categories) %>% 
  mutate(id = row_number()) %>% 
  select(id, name = categories)

nodes_categories
# A tibble: 13 × 2
      id name                          
   <int> <chr>                         
 1     1 Computer program              
 2     2 Education/scholarship/research
 3     3 Film/audiovisual              
 4     4 Format shifting/space shifting
 5     5 Internet/digitization         
 6     6 Music                         
 7     7 News reporting                
 8     8 Painting/drawing/graphic      
 9     9 Parody/satire                 
10    10 Photograph                    
11    11 Review/commentary             
12    12 Textual work                  
13    13 Unpublished                   

To begin creating the edges let’s replace the category names with their node IDs:

categories_to_cases <- case_categories %>% 
  left_join(nodes_categories,
            by = c("categories" = "name")) %>% 
  select(case, id)

categories_to_cases
# A tibble: 516 × 2
   case                                                                       id
   <chr>                                                                   <int>
 1 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)                        2
 2 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)                       10
 3 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. May 31, 2022)          8
 4 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. May 31, 2022)         10
 5 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-7470-LTS…     3
 6 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-7470-LTS…     6
 7 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-7470-LTS…     9
 8 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-7470-LTS…    11
 9 Am. Soc'y for Testing & Materials v. Public.Resource.Org, Inc., No. 13…     2
10 Am. Soc'y for Testing & Materials v. Public.Resource.Org, Inc., No. 13…    12
# ℹ 506 more rows

Now we can inner_join() this object with itself to create all the shared categories. The filter(id.x != id.y) line removes all “loops” from the graph - ie categories connected with themselves.

edges_categories <- categories_to_cases %>% 
  inner_join(categories_to_cases, by = "case") %>%
  filter(id.x != id.y) %>% 
  select(from = id.x,
         to = id.y,
         case)
Warning in inner_join(., categories_to_cases, by = "case"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
edges_categories
# A tibble: 818 × 3
    from    to case                                                             
   <int> <int> <chr>                                                            
 1     2    10 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)             
 2    10     2 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)             
 3     8    10 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. May 31, 202…
 4    10     8 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. May 31, 202…
 5     3     6 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 6     3     9 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 7     3    11 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 8     6     3 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 9     6     9 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
10     6    11 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
# ℹ 808 more rows

This list of edges contains duplicates, that’s important when proceeding to create a graph.

Creating a function

case_categories
# A tibble: 516 × 3
   case                                                    categories n_category
   <chr>                                                   <chr>           <int>
 1 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)    Education…         56
 2 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)    Photograph         56
 3 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. M… Painting/…         31
 4 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. M… Photograph         56
 5 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Film/audi…         59
 6 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Music              25
 7 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Parody/sa…         33
 8 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No… Review/co…         41
 9 Am. Soc'y for Testing & Materials v. Public.Resource.O… Education…         56
10 Am. Soc'y for Testing & Materials v. Public.Resource.O… Textual w…         93
# ℹ 506 more rows
make_nodes_from_grouped <- function(data_by_group,
                                    node_name_col){
  
  data_by_group %>% 
  distinct({{node_name_col}}) %>% 
  arrange({{node_name_col}}) %>% 
  mutate(id = row_number()) %>% 
  select(id, name = {{node_name_col}})
  
}

nodes_from_fn <- case_categories %>% 
  make_nodes_from_grouped(categories)
make_edges_from_grouped <- function(data_by_group,
                                    data_nodes,
                                    grouping_col,
                                    node_name_col){
  
  # node_name_col_chr <- rlang::as_label(node_name_col)
  # 
  # print(node_name_col_chr)
  # 

  grouped_by_nodes <- data_by_group %>%
  left_join(data_nodes,
            by = setNames("name", rlang::as_name(rlang::enquo(node_name_col)))) 
  
  grouped_by_nodes %>% 
  inner_join(grouped_by_nodes, by = rlang::as_name(rlang::enquo(grouping_col))) %>%
  filter(id.x != id.y) %>% 
  select(from = id.x,
         to = id.y,
         {{grouping_col}})

edges_categories
  
}

case_categories %>% 
  make_edges_from_grouped(nodes_from_fn,
                          grouping_col = case,
                          node_name_col = categories)
Warning in inner_join(., grouped_by_nodes, by = rlang::as_name(rlang::enquo(grouping_col))): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
# A tibble: 818 × 3
    from    to case                                                             
   <int> <int> <chr>                                                            
 1     2    10 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)             
 2    10     2 De Fontbrune v. Wofsy, 39 F.4th 1214 (9th Cir. 2022)             
 3     8    10 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. May 31, 202…
 4    10     8 Sedlik v. Von Drachenberg, No. CV 21-1102 (C.D. Cal. May 31, 202…
 5     3     6 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 6     3     9 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 7     3    11 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 8     6     3 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
 9     6     9 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
10     6    11 Sketchworks Indus. Strength Comedy, Inc. v. Jacobs, No. 19-CV-74…
# ℹ 808 more rows