Refcard datapasta
datapasta is an R tool to generate reproducible code documentation.
Table of Contents
Start from data.frame
Initially I have some data.frame. dpasta()
generates reproducible code for the data.frame
library(magrittr)
library(datapasta)
#> Warning: package 'datapasta' was built under R version 3.3.2
df = data.frame(stringsAsFactors=FALSE,
data_field_aid = c(1L, 2L),
data_field_name = c("vehicle_id", "type"),
type = c(NA, "TEXT"),
pk_fk = c(NA, NA),
data_entity_aid = c(1L, 1L),
entity_name = c("Vehicle", "Vehicle")
)
df %>% dpasta()
#> data.frame(stringsAsFactors=FALSE,
#> data_field_aid = c(1L, 2L),
#> data_field_name = c("vehicle_id", "type"),
#> type = c(NA, "TEXT"),
#> pk_fk = c(NA, NA),
#> data_entity_aid = c(1L, 1L),
#> entity_name = c("Vehicle", "Vehicle")
#> )
df
#> data_field_aid data_field_name type pk_fk data_entity_aid entity_name
#> 1 1 vehicle_id <NA> NA 1 Vehicle
#> 2 2 type TEXT NA 1 Vehicle
Start from tibble
Initially I have some tibble. I want to generate reproducible code for df tibble:
library(dplyr)
library(datapasta)
df = tibble::tribble(
~data_field_aid, ~data_field_name, ~type, ~pk_fk, ~data_entity_aid, ~entity_name,
1L, "vehicle_id", NA, NA, 1L, "Vehicle",
2L, "type", "TEXT", NA, 1L, "Vehicle"
)
df %>% dpasta()
#> tibble::tribble(
#> ~data_field_aid, ~data_field_name, ~type, ~pk_fk, ~data_entity_aid, ~entity_name,
#> 1L, "vehicle_id", NA, NA, 1L, "Vehicle",
#> 2L, "type", "TEXT", NA, 1L, "Vehicle"
#> )
Vector and List
Use dpasta()
or vector_paste()
for reproducing vector data:
library(magrittr)
library(datapasta)
mtcars$mpg %>% head() %>% dpasta()
#> c(21, 21, 22.8, 21.4, 18.7, 18.1)
Works with lists too:
library(magrittr)
library(datapasta)
#> Warning: package 'datapasta' was built under R version 3.3.2
alist = list(mpg = head(mtcars$mpg), cyl = head(mtcars$cyl))
alist %>% dpasta()
#> c(c(21, 21, 22.8, 21.4, 18.7, 18.1), c(6, 6, 4, 6, 8, 6))
Vertical printing:
library(magrittr)
library(datapasta)
mtcars$mpg %>% head() %>% vector_paste_vertical()
#> c(21,
#> 21,
#> 22.8,
#> 21.4,
#> 18.7,
#> 18.1)
Copy Table from Excel/TSV
Assume that I copied the following excel/tsv data into the clipboard:
"data_entity_aid" "entity_name"
1 "Vehicle"
2 "Shipment"
3 "Shipment_Action"
Now, I can reproduce data.frame code for this data using df_paste()
df_paste()
#> data.frame(stringsAsFactors=FALSE,
#> X.data_entity_aid. = c(1L, 2L, 3L),
#> X.entity_name. = c("\"Vehicle\"", "\"Shipment\"", "\"Shipment_Action\"")
#> )
Read Data from Excel/TSV
This time, instead of copying data manually from Excel/TSV, I will read it inside R and then reproduce it:
library(magrittr)
readr::read_tsv("data/refcard_datapasta_data_entity.tsv") %>%
datapasta::dpasta()
##> tibble::tribble(
##> ~data_entity_id, ~entity_name,
##> 1, "address_variant",
##> 2, "address",
##> 3, "administrative_location"
##> )
Instead of readr
, I can also use rio::import
but this time the output of dpasta()
is not as nicely formatted as tibble
:
rio::import("data/refcard_datapasta_data_entity.tsv") %>%
datapasta::dpasta()
##> data.frame(stringsAsFactors=FALSE,
##> data_entity_id = c(1L, 2L, 3L),
##> entity_name = c("address_variant", "address", "administrative_location")
##> )
I can also read using the base read.csv
read.csv("data/refcard_datapasta_data_entity.tsv", sep = "\t") %>%
datapasta::dpasta()
##> data.frame(
##> data_entity_id = c(1L, 2L, 3L),
##> entity_name = as.factor(c("address_variant", "address",
##> "administrative_location"))
##> )
Code for Markdown
dmdclip()
produces code that is intended with 4 spaces to use in markdown documents.
library(magrittr)
library(datapasta)
mtcars %>% head() %>% dmdclip()
data.frame(
mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1),
cyl = c(6, 6, 4, 6, 8, 6),
disp = c(160, 160, 108, 258, 360, 225),
hp = c(110, 110, 93, 110, 175, 105),
drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76),
wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46),
qsec = c(16.46, 17.02, 18.61, 19.44, 17.02, 20.22),
vs = c(0, 0, 1, 1, 0, 1),
am = c(1, 1, 1, 0, 0, 0),
gear = c(4, 4, 4, 3, 3, 3),
carb = c(4, 4, 1, 1, 2, 1)
)