R Study Notes & Refcard

My study notes on R from various sources.
refcard with examples

conventions
  iterative
    export(.., na = "")
      always export, write csv files where na = ""
      because java DataFrame doesn't read rows with "NA" when the columns are numeric
ggplot
  library("ggplot2")
  equivalent scatter plots
    plot(mtcars$wt, mtcars$mpg)
    qplot(mtcars$wt, mtcars$mpg)
    qplot(wt, mpg, data = mtcars)
recreate - reproducing dataframes - unserializing
  recreate(df)
  ancak şunu eklemeyi unutma: stringsAsFactors = F
basics
  option
    > options(mert_test = "selam")
    > getOption("mert_test")
    [1] "selam"
  environment variables
    > Sys.getenv("PATH")
    [1] "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/X11/bin:/Library/TeX/texbin"
    > Sys.setenv(mert_test = "selam")
    > Sys.getenv("mert_test")
    [1] "selam"
  vector
    x=c(1,2,4,8,16 )               #create a data vector with specified elements
    y=c(1:10)                 #create a data vector with elements 1-10
    vect=c(x,y)               #combine them into one vector of length 2n
  date
    why multiple date classes
      as.Date: simplest. without times
      chron: handles dates and times but not time zones
      POSIXct, POSIXlt: dates and times with time zones
      POSIXlt: stores a list of day, month, year ...
      POSIXct: stores seconds since unix epoch
      strptime: converts char to POSIXlt
      as.POSIXlt: converts some to POSIXlt 
        if char arg: expects ISO8601 standard format: "2017-12-30"
      as.POSIXct: converts some to POSIXlt
      use simplest possible
    POSIX.ct ve POSIXlt
      as.POSIXct("2015-01-01")
      as.POSIXct(df02$validFrom, format = "%d.%m.%Y")
    char to POSIX.ct
      strptime("20160115", "%Y%m%d")
    as.Date
      as.Date('1915-6-16')
      as.Date('20170517', format = "%Y%d%m")
    format
      format(Sys.time(), "%y%m%d%H%M")
    ?strftime
    ?strptime
      z <- strptime("20/2/06 11:16:16.683", "%d/%m/%y %H:%M:%OS")
      strptime("20160115", "%Y%m%d")
      # [1] "2016-01-15 AST"
    extract year, mon out of date
      as.numeric(format(date1, "%m"))
    convert char to date
      as.Date( '2012-05-12' )
      as.Date('20140408',"%Y%m%d")
    lubridate
      month(date1)
      year(date1)
    current date
      Sys.time()
      Sys.Date()
    convert string to time
      t
      # [1] "1505" "1825" "1156" "1925" "1055" "1850"
      t %>%
        strptime( format = "%H%M" ) %>%
        strftime( "%H" )
      # [1] "15" "18" "11" "19" "10" "18"
      t %>%
        strptime( format = "%H%M" ) 
      # [1] "2015-09-15 15:05:00 EEST" "2015-09-15 18:25:00 EEST" "2015-09-15 11:56:00 EEST" 
    sequence of dates
      ## first days of years
      seq(as.Date("1910/1/1"), as.Date("1999/1/1"), "years")
      seq(from = as.Date("1910/1/1"), to = as.Date("1999/1/1"), "day")
      seq(from = as.Date("1910/1/1"), by = "day", length.out = 30)
    difference of time
      ex
        d1 = Sys.Date()
        d2 = as.Date("2017-03-04")
        difftime( d1, d2, units = "days")
      ex
        a = strptime("20160115", "%Y%m%d")
        b = strptime("20160119", "%Y%m%d")
        difftime(a, b, units = "days") 
        # Time difference of -4 days
        difftime(a, b, units = "days") %>% as.double
        # -4
      ex
        mutate( gecikme_gun = 0 - as.double(difftime( strptime(termin_tarihi, "%Y%m%d"), strptime(kesim_tarihi, "%Y%m%d"), units = "days")) )
      t0 = strptime("0000", format = "%H%M")
      difftime(today(), t0)
      difftime(now(), t0)
      difftime(t2, t0)
      note: give units = "hours" to difftime to make it reproducible
    increment date by period
      d1 = strptime("20170512", "%Y%m%d")
      d2 = d1 + days(1)
      format(d2, "%Y%m%d")
    excel date as number to posixct date
      opt1: 
        readxl::read_excel(path = sevk_emri_file, col_types = c("text", "date", "text", "text", "text", "text", "text", "text", "text", "numeric", "text", "text", "text", "text"))
      opt2:
        as.POSIXct(sem$shipment_date * (60*60*24) , origin="1899-12-30" , tz="GMT")
  Operator precedence
    x = text[data_starts_at+1:length(text)]
    -->
    x = text[(data_starts_at+1):length(text)]
  Loop
    vector/list
      for (e in mylist) {...}
    data frame/table
      for (i in 1:nrow(df)) {
        df[i,]
        dt[i]
      # wrong: for (row in df/dt)
control
  if (cond) expr1 else expr2
  for (var in seq) expr
  while (cond) expr
  ifelse(cond, yes, no)
debug
  debug(fun); setBreakpoint('script.R#5')
  debug(get_olasi_kombin);
    bu durumda bu fonksiyonun ilk satırında breakpoint konulmuş olur
  browser()
exception handling - trycatch
  non-interactive
    opt2: quit if error and print traceback
      options( error=function() { traceback(2); if(!interactive()) quit("no", status = 1, runLast = FALSE) } )
      pros: 
        quits always even in assert_that errors
        prints tracebacks
    opt1: quit if error
      code
        tryCatch({
          prepare_csv_for_sql()
          verify_no_dup_actual()
          verify_organization_data()
        }, error = function(e) {
          print("err3")
          quit(status=10)
        })
      cons: doesn't print traceback in assert_that errors
  opt1: make_try_capture to capture traceback message
    make_try_capture(read_excel2)(path, sheet)
    # return value of make_try_capture
    make_try_capture(read_excel2)(path, sheets[1])
    # how to use in lapply
    dfl2 = sheets[1:2] %>>%
      lapply( function(sheet) make_try_capture(read_excel2)(path, sheet)  ) %>%
      setNames( sheets[1:2] )
    # how to use lapply normally?
    dfl3 = sheets[1:2] %>>%
      lapply( function(sheet) read_excel2(path, sheet) ) %>%
      setNames( sheets[1:2] )
    setequal(dfl3, dfl2)
  make_trycatch = function(fun)
    function(x)
      tryCatch(
        fun(x),
        error = function(cond) x
      )
  try_root_xbrl = make_trycatch(xbrl_tester)
  # alt
  try_root_xbrl = make_trycatch(
    function(x) x %>% root_xbrl %>>% (x ~ NA)
  )
  # testing
  filenames %>% 
    llply(. %>% try_root_xbrl, .progress = "text") %>%
    unlist %>>%
    `[`(is.na(.) %>% `!`)
non-standard evaluation
  quote
  eval
  library("evaluate")
reflection-metaprogramming
  str
    no return
  summary
  toString
    convert an object to string
  args
data structures
  m = cbind(m1, m2) # column bind
  m[4, 2]
  m[3, ] # row
  m[ , 2] # col
  rbind(df1, df2)
datatable
  assign a column of a subset of rows 
    dt[idx_of_ims]$ilce = ilce
  managing
    tables()
    setkey(dt, pk, name)
    setkeyv(dt, 'pk')
    copy(dt) # new object
    setnames(df, old_names, new_names)    # data.table
  accessing rows
    dt[2] # 2. row
    dt[ c(2,3) ] # 2,3. rows
    df[ , 2:3 ] # in df
    dt['b'] == dt[ key == 'b' ]
    dt[i, j, by=..]
      by: list of functions
    dt[ c(F,T) ] # even rows
  accessing columns
    dt[ , v] == dt[ , 'v', with=F] # returns vector
    dt[ , list(v) ] # returns dt
    dt[ 2:3, sum(v) ] # sum(v) over 2:3
    dt[ , c(1, 'col1'), with=F]
  joins
    setkey(X, key1)
    setkey(Y, key2)
    dt = X[Y, nomatch=0]
      nomatch=NA # default returns NA for non-matches: right outer join
      nomatch=0 # no rows returned for non-matches: inner join
      Y[X] # left outer join
      rbind(Y[X],X[Y]) # full outer join
    cross join
      ft = c('D','D/A')
      c3 = c('1571745','1571673')
      CJ(ft,c3) # cross join
    remove duplicated key value rows
      dt[unique(dt$key), mult = "first"]
  conditional assignment / if true
    a[cik=='1291703']$test = 'ali'
dataframe
  managing
    colnames(df)
    setNames( 1:3, c("foo", "bar", "baz") )
      > setNames(data.frame(v1=c(1:10), v2=seq(1, 100, length=10)), c("X","Y"))
      X   Y
      1   1   1
      2   2  12
      df = import(path) %>%
        setNames(c("IsYeri","IE_Lot_No","Stok_Kodu","Kalite_Kodu","Siparis","Str","L_Eni","L_Boy","IE_Tarihi","Terrmin_Tarihi","Miktar","sira"))
    names(df) <- tolower(names(df)) 
    unname(obj) # remove names
  access
    df[i,]   # row i 
    df[,j]   # column j
    tail(df, 1)
      Last row
  columns
    cols = names(df) %in% c("q3", "q4")
    df = df[!cols] # remove
    df = df[c(-8, -9)] # remove
    dt[ , -6:-16 ] # by range
    df[cols] # keep
    df$cols = NULL # remove
    df[ , c(2,1,3) ] # change column order
  rows
    remove rows
      df[ -ids, ] # remove by index
      df[ !dups, ] # remove by T/F
      but if ids is integer() above methods won't work
      opt1
        df %>%
           filter(!row_number() %in% drop)
      opt2 
        df[!seq_len(nrow(df)) %in% drop, ]
      opt3
        df[ setdiff(1:nrow(df), drop), ]
  rownames
    rownames(f) <- c()
  subset/query
    df[ df$col == logical ]          
  examples/use cases
    find rows with all NA cells
      d4 = data.frame( x = c(1, NA), y = c(NA, NA))
      applyr = partial(apply, MARGIN = 1)
      r4 = applyr(is.na(d4), all)
      r4 == c(F,T)
    remove rows with all NA values
      df %>%
        filter( !applyr(is.na(.), all) )
    { # loop over all columns of some df
      df3 = data.frame( operation_id = df[['operation_id']] )
      # opt1
      for (i in seq_along(cols)) {
        df3[[cols[i]]] = df[[cols[i]]]
      }
      # opt2
      df4 = df[cols] 
      # opt3
      df6 = lapply(seq_along(cols), function(i) df[[cols[i]]] %>% as.numeric )
      # opt4
      df7 = lapply(cols, function(col) df[[col]] %>% as.numeric ) %>% 
        setNames(cols) %>%
        as.data.frame
      }
    apply for datframe is lapply
      convert list of vectors to dataframe
        df7 = lapply(cols, function(col) df[[col]] %>% as.numeric ) %>% 
          setNames(cols) %>%
          as.data.frame
        map(df, ~ str_replace_all(.x, '\\n', '') ) %>%
          as.data.frame
    study_create_dataframe_with_columns_specified_in_list 
      opt1
        l = list( a = NA, b = NA )
        df = as_data_frame(l)
      opt2
        l2 = setNames( replicate(2,NA, simplify = F), c('a', 'b'))
        df = as_data_frame(l2)
lapply variants lapply2
  ldfapply: list of dataframe
    # v0: lapply
    lapply( seq_along(ldf), function( i, ldf ) {
        sheet = names(ldf)[i]
        print(sheet)
        text = unlist( ldf[i] )
        writeLines( text, paste0( "../rdm/auto_dm_", sheet, ".md" ) )
      }, ldf
    )
    # v1: ldfapply
    ldfapply( ldf, function( df, n ) {
      print(n)
      text = unlist(df)
      writeLines( text, paste0( "../rdm/auto_dm_", n, ".md" ) )
    })
split_by into list of df
  lapply( ect$enum_category_id, function( ecid, evl ) {
    filter( evl, enum_category_id == ecid )
  }, evl )
list
  [    # same class + multiple returns
  [[    # any type + single element
  $    # semantics similar to [[
  ls[ [length(ls)+1] ] = elem # append item
rep vs replicate 
  a2 = data_frame( id = 5:7 )
  a3 = a2 %>%
    slice( rep(1:n(), each = 2)) %>%
    mutate( col = rep(1:2, each = 3) )
  #      id   col
  #   <int> <int>
  # 1     5     1
  # 2     5     1
  # 3     6     1
  # 4     6     2
  # 5     7     2
  # 6     7     2
  replicate(3, 1:2, simplify=F) %>% unlist
  # [1] 1 2 1 2 1 2
  a4 = a2 %>%
    slice( rep(1:n(), each = 2)) %>%
    mutate( col = unlist(replicate(3, 1:2, simplify = F)) )
  #      id   col
  #   <int> <int>
  # 1     5     1
  # 2     5     2
  # 3     6     1
  # 4     6     2
  # 5     7     1
  # 6     7     2
  elems = 1:2
  a5 = a2 %>%
    slice( rep(1:n(), each = length(elems))) %>%
    mutate( col = unlist(replicate(n()/length(elems), elems, simplify = F)) )
  a6 = a2 %>%
    mutate_looping(elems, "col")
subset/query
  x %in% y
  v[ v == logical ]
  all(x)
  any(x)
  grep / filter 
    grep(pattern, x, ignore.case = FALSE, perl = FALSE, value = FALSE, fixed = FALSE, useBytes = FALSE, invert = FALSE) 
    grep("[a-z]", letters) # returns indexes
      value=T  # return values
    grepl      # returns logical
    vgrepv
      files = list.files( "data/" ) %>%
        vgrepv( "\\.tsv$" )
  get row index of subset
    row.idx = as.numeric(rownames(rows))
  subset and assign values
    lfs = data.frame( from = c(NA, 'x', 'x', 'y'), to = c('x', 'y', 'p', 'z'), level = NA)
    # from to
    #   NA  x
    #    x  y
    #    x  p
    #    y  z
    lfs[ is.na(lfs$from), ]$level = 1
    # from to level
    # <NA>  x     1
    #    x  y    NA
    #    x  p    NA
    #    y  z    NA
    lfs = lfs %>%
      filter( is.na(from) ) %>%
      mutate( level = 1 ) 
    # from to level
    # <NA>  x     1
sort
  sort/order difference
    order(symbols)
    [1] 1 2 3   # indexes
    sort(symbols)
    [1] "A"  "AA" "AA^"  # actual values
  dataframe
    df[ order(df$B), ]  
    df[ rev(order(df$B)), ] # reverse order
  data table
    dt[order(x,y))
    dt[order(-rank(x),y))
      no dt$col since dt is an environment
conversions
  text -> yaml -> list -> dataframe
    study_build_ddl_2_table = function() {
      ddl = readLines("data/delete_sql/hibernate_ddl_create_table.sql")
      out = ddl %>%
        str_replace_all("create table", "") %>%
        str_replace_all("number\\([^)]*\\)[^,]*", "") %>%
        str_replace_all("varchar2?\\([^)]*\\)", "") %>%
        str_replace_all("primary key *\\([^)]*\\)", "") %>%
        str_replace_all(", *\\)$", "") %>%
        str_replace_all(" *\\(", "\t") %>%
        str_replace_all(" *, *", "\t") %>%
        str_trim(side = "both")
      writeLines(out, "data/delete_sql/ddl_out1.txt")
      out2 = out %>%
        str_replace_all("\t(\\w+)", "\n  - \\1") %>%
        str_trim(side = "both") %>%
        str_replace_all("^(\\w+)", "\\1:") %>%
        str_trim(side = "both") 
      writeLines(out2, "data/delete_sql/ddl_out2.yaml")
      yml = yaml.load_file( "data/delete_sql/ddl_out2.yaml")
      extract_columns = function(i, yml) {
        table = yml[i] %>% names
        data_frame( 
          table_name = table,
          column_name = yml[[table]] 
        )
      }
      out3 = lapply( seq_along(yml), extract_columns, yml) %>%
        bind_rows
      writeLines(out3, "data/delete_sql/ddl_out3.tsv" )
    }
  convert list to dataframe / tree to flat
    opt7: purr map_chr
      repos = my_repos("owner", limit = 100)
      toJSON(repos) %>%
        writeLines( "data/repos.json" )
      df = tibble(
        name = repos %>% map_chr("name", .null = NA_character_),
        full_name = repos %>% map_chr("full_name", .null = NA_character_)
      )
    opt6: str_split unnest group_by spread
      str_split then convert to dataframe column  <url:#r=sr_0003>
    opt5: using nest
      mygenes
        Entrez  symbols
        7841    MOGS,CDG2B,CWH41,DER7,GCS1 
      mygenes %>% 
        mutate(symbols=strsplit(as.character(symbols), ",")) %>% 
        unnest(symbols)
             Entrez symbols
          1    7841    MOGS
          2    7841   CDG2B
          3    7841   CWH41 
    opt1
      https://gist.github.com/aammd/9ae2f5cce9afd799bafb
      https://github.com/krlmlr/kimisc/blob/develop/R/list_to_df.R
        unnamed.list <- replicate(10,rand_mat(),simplify = FALSE) 
        named.list <- unnamed.list %>% set_names(LETTERS[1:10])
        list_to_df <- function(listfordf){
          if(!is.list(named.list)) stop("it should be a list")
        df <- list(list.element = listfordf)
        class(df) <- c("tbl_df", "data.frame")
        attr(df, "row.names") <- .set_row_names(length(listfordf))
        if (!is.null(names(listfordf))) {
          df$name <- names(listfordf)
        }
        df
      }
      rand_mat <- function() {
        Nrow <- sample(2:15,1)
        Ncol <- sample(2:15,1)
        rpois(Nrow*Ncol,20) %>%
          matrix(nrow = Nrow,ncol = Ncol)
      }
      list_to_df(unnamed.list)
    opt2
      http://stackoverflow.com/questions/29265702/r-reorganize-list-into-dataframe-using-dplyr
      l =list()
      l[[1]] = list(member1=c(a=rnorm(1)),member2=matrix(rnorm(3),nrow=3,ncol=1 2016-06-12imnames=list(c(letters[2:4]),c("sample"))))
      l[[2]] = list(member1=c(a=rnorm(1)),member2=matrix(rnorm(3),nrow=3,ncol=1 2016-06-12imnames=list(c(letters[2:4]),c("sample"))))
      l[[3]] = list(member1=c(a=rnorm(1)),member2=matrix(rnorm(3),nrow=3,ncol=1 2016-06-12imnames=list(c(letters[2:4]),c("sample"))))
      lapply(l, `[[`, 2) %>% 
        data.frame %>% 
        add_rownames("key") %>% 
        gather(x, value, -key) %>% 
        select(-x) 
    opt3
      obs1 <- list(x="a", value=123)
      obs2 <- list(x="b", value=27)
      obs3 <- list(x="c", value=99)
      dlist <- list(obs1, obs2, obs3)
      dlist
      opt1: lapply
        dlist %>% lapply(as_data_frame) %>% bind_rows()
        df %>% lapply(as_data_frame) %>% bind_rows()
      opt2: do.call
        as.data.frame(do.call(rbind, dlist), stringsAsFactors = FALSE) 
    opt4: manual lapply per each list
      # study_convert_list_to_dataframe = function() { <url:file:///~/Dropbox (BTG)/TEUIS PROJECT 05-ANALYSIS/working_library/requirements_database/scripts/verify_enums.R#r=g_10023>
  convert factor columns to character
    http://stackoverflow.com/questions/2851015/convert-data-frame-columns-from-factors-to-characters
    opt1: lapply df[]
      df[] = lapply(df, as.character)
    opt2: purr
      bob %>% map_if(is.factor, as.character)
  convert list of vectors to dataframe
    df7 = lapply(cols, function(col) df[[col]] %>% as.numeric ) %>% 
      setNames(cols) %>%
      as.data.frame
operators
  [ [<- [[ $ [[<- $<-
input/output
  csv
    dt = fread(file) 
    read.csv(filename, header=T)   
    write.csv(df, file)
    read.csv(text = "..")
      csv = 'id,size
      1,100
      2,150'
      read.csv(text = csv)
  fread arguments
    skip
      skip = "string"
        search "string" start on that line
      skip = 10
        skip first 10 lines
    select = cols # columns to keep
    drop = cols # column names to drop
    fread(url) # read url directly
    fread(string) # read string directly
  readLines writeLines - text
    text = readLines( file )
    writeLines(lines, "names_stats.txt")
    readLines(con <- file("Unicode.txt", encoding = "UCS-2LE"))
  read.csv args
    na.strings = c("foo", "bar") # custom NA labels
    header = T
    sep = ","
    read.csv(file, header = TRUE, sep = ",", quote = "\"",
      dec = ".", fill = TRUE, comment.char = "", ...)
    read.delim(file, header = TRUE, sep = "\t", quote = "\"",
      dec = ".", fill = TRUE, comment.char = "", ...)
  readr
    delimited: read_delim(), read_csv(), read_tsv(), read_csv2().
    fixed width: read_fwf(), read_table().
    lines: read_lines().
    whole file: read_file().
    write_csv()
  excel readxl
    read_excel("my-spreadsheet.xls", sheet = "data")
    link içeren excel dosyaları
      bir excel dosyası başka bir dosyaya link içerdiğinde, "update links" demek gerekiyor
      aksi taktirde eski veriler okunur
  write excel
    opt1
      library(openxlsx)
      write.xlsx( r, "temp3.xlsx" )
      write.xlsx( r, "temp4.xlsx", asTable = T)
      write.xlsx( r, "temp.xlsx", sheetName = "storyboard2", append = T )
      write.xlsx( r, "temp.xlsx", sheetName = "storyboard3", append = T )
      write.xlsx( r, "rdb_mockups.xlsx", sheetName = "storyboard3", append = T )
    opt2
      library(xlsx)
      write.xlsx(report, 'view_open_problems.xlsx', row.names = F)
    opt3     
      ## Lists elements are written to individual worksheets, using list names as sheet names if available
      l <- list("IRIS" = iris, "MTCATS" = mtcars, matrix(runif(1000), ncol = 5))
      write.xlsx(l, "writeList1.xlsx")
  read.table
    read.table(file, header = FALSE, sep = "", quote = "\"'",
      dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
      row.names, col.names, as.is = !stringsAsFactors,
      na.strings = "NA", colClasses = NA, nrows = -1,
      skip = 0, check.names = TRUE, fill = !blank.lines.skip,
      strip.white = FALSE, blank.lines.skip = TRUE,
      comment.char = "#",
      allowEscapes = FALSE, flush = FALSE,
      stringsAsFactors = default.stringsAsFactors(),
      fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
  openxlsx
    read.xlsx(xlsxFile, sheet = 1, startRow = 1, colNames = TRUE, 
      rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE, 
      rows = NULL, cols = NULL, check.names = FALSE, namedRegion = NULL)
base
  vignette
    browseVignettes("dplyr")
    vignette("backend", package = "DBI")
  match.arg
    ‘match.arg’ matches ‘arg’ against a table of candidate values as specified by ‘choices’, where ‘NULL’ means to take the first one.
    code
      my_repos <- function(type = c("all", "owner", "public", "private", "member")) {
        type <- match.arg(type)
file system
  file name from path
    basename("C:/some_dir/a")
    > [1]  "a"
    dirname("C:/some_dir/a")
    >[1] "C:/some_dir"
  dir.create(path = ... ) # mkdir
  list.files(path = ".", pattern = NULL, all.files = FALSE,
    full.names = FALSE, recursive = FALSE,
    ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE)
  dir(path = ".", pattern = NULL, all.files = FALSE,
    full.names = FALSE, recursive = FALSE,
    ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE)
  list.dirs(path = ".", full.names = TRUE, recursive = TRUE)
  home directory
    setwd("~")
  join/concat paths
    file.path(dir1, dir2)
  file.copy(from, to)
    copy directories
      file.copy("data/verify", get_transaction_dir_v1(file_name), recursive = T)
  file.create(..., showWarnings = TRUE)
  file.exists(...)
  file.remove(...)
  file.rename(from, to)
  file.append(file1, file2)
  file.copy(from, to, overwrite = recursive, recursive = FALSE,
            copy.mode = TRUE, copy.date = FALSE)
  file.symlink(from, to)
  file.link(from, to)
sequence rep length cut seq
  rep(x, ntimes)
    rep(c(0, 5), times=c(3, 2)) # 0 0 0 5 5 
    rep(c(0, 5), c(3, 2)) # 0 0 0 5 5 
    rep(c(0, 5), each=4) # 0 0 0 0 5 5 5 5
  length(x)
  seq(from, to, by)
  cut(x, n)
  sample(x, size, replace = F)
  replicate(n, expr)
    replicate(5, sample(1:10, 15, replace = T), simplify = F)
      list of 5 vectors with 15 numbers
    simplify=T # dataframe of 15 rows 5 columns
    unlist(..) # 75 numbers
String
  stringi
    transliterate
    totitle case
      label %>%
        str_replace_all( "_", " " ) %>%
        stri_trans_totitle( locale = "tr_TR" )
  substring
    substring("ahmet", 1, 3)
    substring("ahmet", 1, 3:5)
    remove last n chars
      substr(x, 1, nchar(x) - n)
  string templating
    sprintf
      sprintf("Filings: %d", nrow(hfs) )
      sprintf("Filings: %f", 7.2 )
      out of order
        sprintf("%2$s %1$s", "hello", "world")
    leading zeros
      sprintf("%03s", 1:end)
    escaping percent
      sprintf("%s escape %%that", "ali")
    examples
    sprintf: arguments cannot be recycled to the same length
      problem
        sprintf( "%s/QTR%s", as.character(year), as.character(quarter) )
        year and quarter cannot be recycled
      cross join and using dataframe with sprintf
        df = CJ(year, quarter)
        sprintf("%s,%s",df$V1, df$V2)
    named placeholders
      gsubfn
        library("gsubfn")
        df = data.frame( id = 1:3, eroziya = 5:7 )
        '%(id)s: %(eroziya)d' %format% df
        #[1] "1: 5" "2: 6" "3: 7"
  paste (concat)
    na'leri blank ile replace et
      > na.exclude(c(NA, 3)) %>% as.character
      [1] "3"
    paste("q", 1:5, sep="") # concat +
      [1] "q1" "q2" "q3" "q4" "q5"
    vektör için collapse: # python join
      paste(c("ali","veli"), collapse=",")
        [1] "ali,veli"
    collapse: tek parçaya collapse eder
    sep: concat edilen stringler nasıl ayrılmalı. 
    paste0('converted ', "here")
      [1] "converted here"
  regex
    https://www.regex101.com/ 
      debug regex
    ref
      grep(pattern, x, ignore.case = FALSE, perl = FALSE, value = FALSE, fixed = FALSE, useBytes = FALSE, invert = FALSE)
      grepl(pattern, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
      sub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
      gsub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
      regexpr(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
      gregexpr(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
      regexec(pattern, text, ignore.case = FALSE, fixed = FALSE, useBytes = FALSE)
    stringr
      str_replace(string, pattern, replacement) # "string" %s/pattern/repl/
      str_replace(fruits, "[aeiou]", "-")
      str_replace_all(fruits, "[aeiou]", "-")
      str_replace_all("\t(\\w+)", "\n  - \\1") %>%
      multiple patterns
        fruits <- c("one apple", "two pears", "three bananas")
        # If you want to apply multiple patterns and replacements to the same
        # string, pass a named version to pattern.
        str_replace_all(str_c(fruits, collapse = "---"),
        c("one" = 1, "two" = 2, "three" = 3)) 
        # [1] "1 apple---2 pears---3 bananas"
      str_match
        strings = c(" 219 733 8965", "329-293-8753 ", "banana")
        pattern <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
        str_extract(strings, pattern)
        m = str_match(strings, pattern)
               [,1]      [,2] [,3]  [,4]
          [1,] "219 733 8965" "219" "733" "8965"
          [2,] "329-293-8753" "329" "293" "8753"
          [3,] NA      NA NA  NA
        m[1,1] # match 1 group 1
        m[1,2] # match 1 group 2
      str_locate("aaa12xxx", "[0-9]+")
        #      start end
        # [1,]     4   5
      str_extract("aaa12xxx", "[0-9]+")
        # [1] "12"
    lookaround
      lookbehind
        (?<=) positive
        (?<!) positive
      lookahead
        (?=)  positive
        (?!)  negative
    escapes backslashes
      backslashes need to be doubled
    character classes
      [:alnum:]
        [:alpha:] [:digit]
      [:blank:]
    examples
      trim whitespace
        # returns string w/o leading whitespace
        trim.leading <- function (x)  sub("^\\s+", "", x)
          # returns string w/o trailing whitespace
        trim.trailing <- function (x) sub("\\s+$", "", x)
          # returns string w/o leading or trailing whitespace
        trim <- function (x) gsub("^\\s+|\\s+$", "", x)
        To use one of these functions on myDummy$country:
        myDummy$country <- trim(myDummy$country)
  character functions
    nchar(x)    
      number of char in x
    substr(x, start, stop)    
      substr(x, 2, 4)
      substr(x, 2, 4) <- "222"
    grep(pattern, x, ignore.case=FALSE, fixed=FALSE)
      fixed=FALSE   regex
      returns matching indices
    sub(pattern, replacement, x, ignore.case=FALSE, fixed=FALSE)
      sub("\\s", ".", "Hello there")
      > Hello.there
    strsplit(x, split)
      strsplit("abc", "")
    paste(..., sep="")
      concatenate strings
      paste("x", 1:3, sep="m")
      > c("xM1", "xm2", "xm3")
      paste( 1:3, collapse = "; " )
      > [1] "1; 2; 3"
    case conversions
      toUnderscore(x)
        convert camel case to underscore separated lower case
      toupper(x)
      tolower(x)
      tocamel(x)
        library("rapportools")
        tocamel("foo.bar")
          ## [1] "fooBar"
        tocamel("foo.bar", upper = TRUE)
          ## [1] "FooBar"
        tocamel(c("foobar", "foo.bar", "camel_case", "a.b.c.d"))
          ## [1] "foobar"    "fooBar"    "camelCase" "aBCD"
      unicode: stri_trans_tolower
        stri_trans_totitle( locale = "tr_TR" )
  rematch
    install_github("MangoTheCat/rematch")
    match
      re_match(text = dates, pattern = iso)
  stringr
    str_trim
      str_trim(string, side = c("both", "left", "right"))
      x %>% 
        str_trim(side = "both")
    str_split
      returns list
      str_split with dplyr: take last element
        df = data_frame( a = c("ali,veli", "can,cin" ) )
        d6 = df %>%
          mutate( b = str_split(a, ",") ) %>%
          unnest(b) %>%
          group_by(a) %>%
          filter(row_number()==n())
      use unlist to convert to vector
        t %>%
        str_split("\\n") %>%
        unlist
      str_split then convert to dataframe column  id=sr_0003
        str_split then convert to dataframe column  <url:#r=sr_0003>
        d4 = ft %>%
          mutate( bn = str_split(sinif_tip_formasiya_adi, "\\(") ) %>%
          unnest(bn) %>%
          group_by( fte_id ) %>%
          mutate( info = row_number() ) %>%
          spread( info, bn ) %>%
          rename( dom_subdom = `1`, bitki_adlari = `2`, other = `3` )
    str_trim( unlist( str_split(goog,',') ) )
      [1] "GOOG"  "GOOGL"
    basic
      str_c
        paste0 like
      str_length
        nchar like
        preserves NA
      str_sub
        substr like
        negative positions
          end: -1
        zero length input
        ex
          hw <- "Hadley Wickham"
          str_sub(hw, 1, 6)
          str_sub(hw, end = 6)
          str_sub(hw, 8, 14)
          str_sub(hw, 8)
          str_sub(hw, c(1, 8), c(6, 14))
          # Negative indices
          str_sub(hw, -1)
          str_sub(hw, -7)
          str_sub(hw, end = -7)
          # Alternatively, you can pass in a two colum matrix, as in the
          # output from str_locate_all
          pos <- str_locate_all(hw, "[aeio]")[[1]]
          str_sub(hw, pos)
          str_sub(hw, pos[, 1], pos[, 2])
          # Vectorisation
          str_sub(hw, seq_len(str_length(hw)))
          str_sub(hw, end = seq_len(str_length(hw)))
          # Replacement form
          x <- "BBCDEF"
          str_sub(x, 1, 1) <- "A"; x
          str_sub(x, -1, -1) <- "K"; x
          str_sub(x, -2, -2) <- "GHIJ"; x
          str_sub(x, 2, -2) <- ""; x 
      str_str<-
        substr<-
      str_dup
        to duplicate chars
      str_trim
      str_pad
        pad extra whitespace
    pattern matching
      detect
        str_detect
          grepl like
      locate
        str_locate
        str_locate_all
        based on: regexpr
      extract
        str_extract
        str_extract_all
      match
        str_match
          capture groups by ()
        return: matrix
          one column for each group
        str_match_all
      replace
        str_replace
        str_replace_all
        based: sub
      split
        str_split_fixed
  unicode
    detect encoding
      s = readLines(paste0(dir, "siparisler.csv"), n = 100) %>% paste(collapse = "\\n")
      if (stri_enc_isutf8(s)) 
    totitle case
      label %>%
        str_replace_all( "_", " " ) %>%
        stri_trans_totitle( locale = "tr_TR" )
  examples
    append new lines
      r = character()
      r = c(r, sprintf("filings that don't have xbrl: %s", length(missing_xbrl)))
dplyr
  vignettes and tutorials
    http://www.dataschool.io/dplyr-tutorial-for-faster-data-manipulation-in-r/
    https://cran.r-project.org/web/packages/dplyr/vignettes/databases.html
  alias
    extract2 [[
    check magrittr alias
  examples
    http://stackoverflow.com/questions/31358953/in-r-subset-or-dplyrfilter-with-variable-from-vector
    rbind: filter mutate select left_join(original)
      fkd = r_data_field() %>%
        select( 1:2, pk_fk ) %>%
        filter( pk_fk == "FK" ) %>%
        mutate( fk_data_entity_name = ...)
        select( data_field_id, fk_data_entity_name )
      dfl = r_data_field(with_invalid=T) %>%
        select( -fk_data_entity_name ) %>%
        left_join(fkd)
      export(dfl, "data/updates/DataField_updated.tsv")
    generify nse columns with se
      http://www.carlboettiger.info/2015/02/06/fun-standardizing-non-standard-evaluation.html
      https://cran.r-project.org/web/packages/dplyr/vignettes/nse.html
      http://www.r-bloggers.com/dplyr-use-cases-non-interactive-mode/
      mutate
        https://stackoverflow.com/questions/26003574/r-dplyr-mutate-use-dynamic-variable-names
        opt1
          varname <- paste("petal", n , sep=".")
          varval <- lazyeval::interp(~Petal.Width * n, n=n)
          mutate_(df, .dots= setNames(list(varval), varname))
        opt2
          varname <- paste("petal", n, sep=".")
          df<-mutate_(df, .dots=setNames(paste0("Petal.Width*",n), varname))
      rename
        rename with whitespaces
          sip = r_Siparis_v1(file_name, "xlsx") %>%
            rename( 
              tesis = `İşYeri`,
              id = `İE Lot No`,
        # opt0
          rename( iris, genus = Species )
        # opt1
          rename_( iris, "new" = "Species" )
        # opt1
          lhs = "new"
          rename_( iris, lhs = "Species" )
        # opt2
          rhs = "Species"
          rename_( iris, .dots = setNames(rhs, "new") )
          # setNames(object = nm, nm)
          # ===>
          # setNames( "new" = x )
      select
        columns = c("enum_value_id", "enum_value_name", "enum_id", "value", "parent_id", "order_no")
        entity = "EnumValue"
        df = r_rdb(entity) %>%
          select_(.dots = columns)
        nse
          df = r_rdb(entity) %>%
            select(enum_value_id, enum_value_name, enum_id, value, parent_id, order_no) 
      filter
        only_if adverb
          https://twitter.com/drob/status/785880369073500161
        # opt0: nse
          filter( iris, Species == "setosa" )
        # opt1: se for value of formula expression
          .dots = list( ~Species == "setosa" )
          filter_( iris, .dots = .dots )
        # opt1 application: treat arguments as variables
          value = "setosa"
          .dots = list( ~Species == value )
          filter_( iris, .dots = .dots )
        # opt2: se for column of formula expression
          library("lazyeval")
          value = "setosa"
          column = "Species"
          .dots = list( interp( ~y == x ,
                                .values = list( y = as.name(column), x = value ) ))
          filter_( iris, .dots = .dots )
        opt1
          family <- 'Scaridae'
          field <- 'Family'
          .dots <- list(interp(~y == x, 
            .values = list(y = as.name(field), x = family)))
          x3 <- filter_(all_taxa, .dots=.dots)
      group_by
        kullanım: group_by dışı değişkenleri nasıl gösterceğiz?
          1. önce mutate yap
          2. summarise yap
          3. sonra orjinal tabloyla join et
          kmbg = kmb %>%
            select(kombin_id, genislik) %>%
            left_join(ism, by = "kombin_id") %>%
            select(kombin_id, siparis_id, is_emri_id, genislik, bicak_sayisi) %>%
            left_join(sip, by = "siparis_id") %>%
            select(kombin_id, siparis_id, is_emri_id, genislik, bicak_sayisi, en) %>%
            group_by(kombin_id) %>%
            mutate(en_carpi_bicak = en * bicak_sayisi) %>%
            summarise(toplam_en = sum(en_carpi_bicak))
          cmp = kmb %>%
            left_join(kmbg, by = "kombin_id") %>%
            select(kombin_id, genislik, toplam_en) %>%
            mutate(enden_trim = genislik - toplam_en - 30)
        select max value in each group
          http://stackoverflow.com/questions/24237399/how-to-select-the-rows-with-maximum-values-in-each-group-with-dplyr
          https://stackoverflow.com/questions/21308436/dplyr-filter-get-rows-with-minimum-of-variable-but-only-the-first-if-multiple
          df %>% group_by(A,B) %>% slice(which.max(value))
          opt
            filter( rank(enden_trim_m2, ties.method="first") == 1)
        opt
          columns = c("screen_id", "window_id")
          .dots <- lapply(columns, as.symbol)
          d = swn %>%
            group_by_(.dots = .dots) %>%
            filter( n() > 1 )
        study_generify_group_by = function() {
          # opt1: nse default
          dup_wnd = wnd %>%
            group_by(window_id) %>%
            filter( n() > 1 )
          # opt2: se using character vector
            .dots <- lapply(fk_name, as.symbol)
            have_no_children = parent_df %>%
              inner_join( child_df, by = fk_name ) %>%
              group_by_( .dots = .dots ) %>%
              summarise( count = n() ) %>%
              filter( count == 0 )
          # opt2: se using formula
          .dots = list(~window_id)
          dup_wnd = wnd %>%
            group_by_(.dots = .dots) %>%
            filter( n() > 1 )
          # opt3: treat arguments as variables
          .dots = list(~window_id)
          dup_wnd = wnd %>%
            group_by_(.dots = .dots) %>%
            filter( n() > 1 )
          # opt4: treat key and values as variables
          columns = "window_id"
          .dots = list( interp( ~ y, .values = list(y = as.name(columns) )))
          dup_wnd = wnd %>%
            group_by_(.dots = .dots) %>%
            filter( n() > 1 )
          # opt4.2: multiple keys
          columns = c("window_id", "window_name")
          .dots = list( interp( ~ y, .values = list(y = as.name(columns) )))
          dup_wnd = wnd %>%
            group_by_(.dots = .dots) %>%
            filter( n() > 1 )
          # opt5: encapsulate this into a function
          group_filter_duplicates = function(df, columns) {
            .dots = list( interp( ~ y, .values = list(y = as.name(columns) )))
            df %>%
              group_by_(.dots = .dots) %>%
              filter( n() > 1 )
          }
          columns = c("window_id", "window_name")
          wnd %>%
            group_filter_duplicates(columns)
        }
      join
        opt1: setNames
          d1 = data_frame(x = seq(1,20),y = rep(1:10,2),z = rep(1:5,4))
          head(d1)
          d2 = data_frame(xx = seq(1,20),yy = rep(1:10,2),zz = rep(1:2,10))
          join_fn <-function(d_in1,d_in2,var_vec1,var_vec2){
            d_out = d_in1 %>%
              left_join(d_in2,setNames(var_vec2,var_vec1))
          }
          var_vec1 = c("x","y")
          var_vec2 = c("xx","yy")
          d_join_out = join_fn(d1,d2,var_vec1,var_vec2)
          head(d_join_out)
    incremental row id
      fte02 = import( "inbox/formasiya_tomi_emsallari.xlsx" ) %>%
        remove_all_na_rows %>%
        mutate( fte_id = 1:n() )
    result = by_cik %>%
      summarise_each( funs(last(.)) )
    tbl_df
    join by multiple columns
      inner_join(xcr, by = c("filename", "contextRef"))
    join by different columns
      org3 = org %>%
        left_join( org2, by = c("parent_id" = "organization_id") )
    filtering vector
      cols = c("op_id", "no")
      opt1
        cols[ends_with(cols, '_id')]
      opt2
        cols %>%
          extract( ends_with(., '_id') )
    selecting columns of some df
      opt1
        cols = cols[ends_with(cols, '_id')] 
        df4 = df[cols]
      opt2
        df5 = df %>%
          extract( cols[ends_with(cols, '_id')] )
  verbs
    filter + slice
    arrange
    select + rename
    distinct
    mutate + transmute
    summarise
    sample_n + sample_frac
  case_when     
    ex1
      x <- 1:50
      case_when(
        x %% 35 == 0 ~ "fizz buzz",
        x %% 5 == 0 ~ "fizz",
        x %% 7 == 0 ~ "buzz",
        TRUE ~ as.character(x)
      )
  mutate if
    mutate( type = ifelse(  f$type == f$field, NA, f$type ) )
    opts
      data.table
        id = "field_id" 
        flden = import("data/translation/field_en.xlsx") %>%
          select( one_of("field_id"), ends_with("_en") ) %>%
          as.data.table
        setkey(flden, field_id)
        flden2 = flden[!is.na(field_name_en)]
        fld = read_rdb_field() %>%
          as.data.table
        setkey(fld, field_id)
        assert_that(all( flden2$field_id %in% fld$field_id ) )
        fld2 = fld
        fld2[field_id %in% flden2$field_id]$field_name_en = flden2$field_name_en
        assert_that( setequal( fld[[id]], fld2[[id]] ) )
        export(fld2, "data/translation/field2.tsv")
      dplyr
        id = "field_id" 
        flden = import("data/translation/field_en.xlsx") %>%
          select( one_of("field_id"), ends_with("_en") )
        fld = read_rdb_field()
        fld2 = fld %>%
          left_join( flden, by = id) %>%
          mutate( field_name_en = ifelse( is.na(field_name_en.y), field_name_en.x, field_name_en.y )) %>%
          select( -one_of("field_name_en.y", "field_name_en.x") )
        assert_that( setequal( fld[[id]], fld2[[id]] ) )
        export(fld2, "data/translation/field2.tsv")
  slice     
    row_number'a göre filtreleme yapar
    slice(mtcars, 1L)
    slice(mtcars, n())
    slice(mtcars, 5:n())
    by_cyl <- group_by(mtcars, cyl)
    slice(by_cyl, 1:2)
    # Equivalent code using filter that will also work with databases,
    # but won't be as fast for in-memory data. For many databases, you'll
    # need to supply an explicit variable to use to compute the row number.
    filter(mtcars, row_number() == 1L)
    filter(mtcars, row_number() == n())
    filter(mtcars, between(row_number(), 5, n()))
  filter
    select by position
      slice(flights, 1:10)
    boolean operators explicit
      filter(flights, month == 1 | day == 1)
    filter(flights, month == 1, day == 1)
    babynames %>%
      filter(name %>% substr(1, 3) %>% equals("Ste")) %>%
      group_by(year, sex) %>%
      summarize(total = sum(n)) %>%
      qplot(year, total, color = sex, data = ., geom = "line") %>%
      add(ggtitle('Names starting with "Ste"')) %>%
      print
    filter vector
      files = list.files( "data/" ) %>%
        vgrepv( "\\.tsv$" )
  arrange
    arrange(flights, year, month, day)
    descending order: desc()  
      arrange(flights, desc(year))
    more verbose
      flights[order(flights$year, flights$month, flights$day), ]
  select columns
    select(flights, year, month, day)
    select(flights, year:day)
    select(flights, -(year:day))
    helper functions
      starts_with()
      ends_with
      matches()
      contains()
      ?select
    rename variables using named arguments
      select(flights, tail_num = tailnum) # others are dropped
      rename(flights, tail_num = tailnum)
      rename using string functions
        iris %>% rename_(.dots=setNames(names(.), tolower(gsub("\\.", "_", names(.)))))
    order columns
      select(field_id, data_entity_id:variable_name)
      remaining columns ordering
        http://stackoverflow.com/questions/32040742/dplyrselect-including-all-other-columns-at-end-of-new-data-frame-or-beginni
        all other columns at end
          col <- c("carrier", "tailnum", "year", "month", "day")
          select(flights, one_of(col), everything()) 
        all other at beginning
          select(flights, -one_of(col), one_of(col))
        all dataframe at end
          bind_cols(select(flights, one_of(col)), flights)
        all dataframe at beginning
          bind_cols(flights, select(flights, one_of(col)))
  distinct (unique) rows
    distinct(select(flights, tailnum))
    normalde sadece seçtiğin kolonları tutar
      tüm kolonları tutması için:
        .keep_all = T
  add new columns with mutate()
    mutate(flights, gain = arr_delay - dep_delay)
  summarise values with summarise()
    summarise(flight, delay = mean(dep_delay, na.rm = T))
  sample rows
    sample_n(flights, 10)
    sample_frac(flights, 0.01)
      replace = T
  grouped operations
    arrayization: convert single valued cells into multiple valued cells
      var = r_variable() %>%
        group_by( test_id ) %>%
        summarise( variable_id_list = paste( variable_id, collapse = "," ) ) 
    unarrayization with unnest
      ex2
        http://bioinfoblog.it/2015/02/the-most-useful-r-command-unnest-from-tidyr/comment-page-1/
        d1
          k v
          k1  v1,v2
        ->
        d2
          k v
          k1  v1
          k1  v2
        opt1
          d1 %>%
            mutate( v = str_split(v, ",") ) %>%
            unnest(v)
        opt2
          d1 %>%
            unnest( v = str_split(v, ",") )
      ex
        denp1 = r_data_entity() %>%
          select(data_entity_id, bps_id_list)
        denp2 = r_data_entity() %>%
          mutate(bps_id=str_split(bps_id_list, ",")) %>% 
          unnest(bps_id) %>%
          select(data_entity_id, bps_id)
        #> denp1
           #data_entity_id                    bps_id_list
        #1              86 1,2,3,5,6,7,8,9,10,11,12,13,14
        #2              99                             11
        #> denp2
           #data_entity_id bps_id
        #1              86      1
        #2              86      2
        #3              86      3
    group_by and concat strings by column
      rvw2 = rvw %>%
        left_join( vsc, by = "view_id" ) %>%
        left_join( scw, by = "screen_id" ) %>%
        group_by( view_id ) %>%
        distinct( window_id ) %>%
        summarise( window_id_list = paste( window_id, collapse = "," ) ) %>%
        arrange( view_id )
    filter first row from group_by
      http://stackoverflow.com/questions/31528981/dplyr-select-first-and-last-row-from-grouped-data
      opt1: row_number() == 1
        df %>%
          group_by(id) %>%
          arrange(stopSequence) %>%
          filter(row_number()==1 | row_number()==n())
      opt2: slice(1)
        df %>% arrange(stopSequence) %>% group_by(id) %>% slice(c(1,n()))
    verbs affected by grouping as:
      select() no change
      arrange() orders first by grouping variables
      mutate() and filter() most usefil with window functions (rank() or min(x) == x)
        vignette("window-function")
      sample_n() sample rows in each group
      slice() extract rows within each group
      summarise() explained below
    example
      planes = group_by(flights, tailnum)
      delay = summarise(planes,
        count = n(),
        dist = mean(distance, na.rm = T),
        delay = mean(arr_delay, na.rm = T))
      delay = filter(delay, count > 20, dist < 2000)
    summarise()
      use summarise() with aggregate functions
        that take a vector values, return a single number
        base R: min, max, sum, mean,
        dplyr: n, n_distinct(x), first(x), last(x), nth(x,n)
      ex
        destinations = group_by(flights, dest)
        summarise(destinations, 
          planes = n_distinct(tailnum),
          flights = n()
        )
      when grouping by multiple variables, each summary peels off one level of grouping
        daily = group_by(flights, year, month, day)
        per_day = summarise(daily, flights = n())
        per_month = summarise(per_day, flights = sum(flights))
    grouping without summarising
      flights %>%
        group_by(Dest) %>%
        select(Cancelled) %>%
        table() %>%
        head()
    how does summarise_each work?
      > by_species <- iris %>% group_by(Species)
      > by_species %>% summarise_each(funs(length))
      Source: local data frame [3 x 5]
           Species Sepal.Length Sepal.Width Petal.Length Petal.Width
      1     setosa           50          50           50          50
      2 versicolor           50          50           50          50
      3  virginica           50          50           50          50
      funs(...) applied to each column separately
        > by_species %>% summarise_each(funs(mean), matches("Width"))
        Source: local data frame [3 x 3]
             Species Sepal.Width Petal.Width
        1     setosa       3.428       0.246
        2 versicolor       2.770       1.326
        3  virginica       2.974       2.026
        > by_species %>% summarise_each(funs(mean), Petal.Width)
        Source: local data frame [3 x 2]
             Species Petal.Width
        1     setosa       0.246
        2 versicolor       1.326
        3  virginica       2.026
  window functions
    n inputs and n outputs
    ex
      filter(min_rank(desc(DepDelay)) <= 2) %>%
      =
      top_n(2) %>%
    # for each month, calculate the number of flights and the change from the previous month
      flights %>%
        group_by(Month) %>%
        summarise(flight_count = n()) %>%
        mutate(change = flight_count - lag(flight_count))
      # rewrite more simply with tally
        tally() %>%
        mutate(change = n - lag(n))
    # row numbers of each element in each group
      r2 %>%
        group_by( from ) %>%
        mutate( order2 = row_number(order) )
      # from to order order2
      #    a  b     1      1
      #    a  c     1      2
      #    b  d     2      1
      r2 %>%
        group_by( from ) %>%
        mutate( order2 = row_number() )
      # from to order order2
      #    a  b     1      1
      #    a  c     1      2
      #    b  d     2      1
    ranking 
      functions
        row_number
        min_rank
        dense_rank
      difference:
        how to solve ties
      example
        nums = c(1, 1, 2, 3)
        > min_rank(nums)
        [1] 1 1 3 4
        > dense_rank(nums)
        [1] 1 1 2 3
        > row_number(nums)
        [1] 1 2 3 4
      how to handle ties
        min_rank
          normal ranking
        dense_rank
          doesn't skip the places
        row_number
          ignores ties
  utilities
    instead of str()
      glimpse(flights)
  databases - sql
    jdbc - most reliable
      username = "btg_mis"
      password = "..."
      conStr =  "jdbc:oracle:thin:@52.73.23.191:1521:btgdev"
      drv <- JDBC("oracle.jdbc.driver.OracleDriver",
        "other/ojdbc6.jar",
        identifier.quote="`")
      conn = dbConnect(drv, conStr, username, password)
    read table
      dbReadTibble = function(db, table_name) {
        dbReadTable(db, table_name) %>%
          as_tibble
      }
      act_evt_log = function(db) dbReadTibble(db, "ACT_EVT_LOG")
    select query 
      conn = get_db_aws()
      df = dbGetQuery(conn, "SELECT * FROM T_TEST")
    my_db = src_sqlite("my_db.sqlite3")
    tbl(my_db, "hflights")
    tbl(my_db, sql("SELECT * FROM hflights LIMIT 100"))
    sql command?
      %>% explain()
  join
    join key: group_by keys
      delays <- flights %>%
        group_by(dest) %>%
        summarise(arr_delay = mean(arr_delay, na.rm = TRUE), n = n()) %>%
        arrange(desc(arr_delay)) %>%
        inner_join(location)
    multiple join keys
      inner_join(xcr, by = c("filename", "contextRef")) 
    join types
      left_join
      inner_join
      anti_join
        excluded in right
        but not join
      semi_join
        intersection rows
        but not join
      right_join
        reverse of left
      outer_join
        union
    keys to join
      default: all common columns
      join by different columns
        org3 = org %>%
          left_join( org2, by = c("parent_id" = "organization_id") )
      by: explicitly specify
        inner_join(xcr, by = c("filename", "contextRef")) 
        inner_join(mcf, by="filename") %>%
        documentation says but doesn't work
          by = c("a")
          by = c("a" = "b")
  tally
    planes %>% group_by(type) %>% tally()
    simple count() by group
  do()
    if existing verbs don't work, use do()
    similar to dlply()
    slower
    uses pronoun: . to refer to current group
    ex
      df: houseID, year, price
      by_house = df %>% 
        group_by(houseID) 
      by_house %>% do(na.locf(.))
        na.locf: last observation carried forward. replace na with last non-na value
      by_house %>% do(head(., 2))
      by_house %>% do(data.frame(year = .$year[1]))
  database
    create new database
      my_db <- src_sqlite("my_db.sqlite3", create = T)
    # put/insert/copy data to database
      copy_to(hflights_db, as.data.frame(flights), name = "flights", 
        indexes = list(c("date", "hour"), "plane", "dest", "arr"), temporary = FALSE)
    # load data
      weather_db <- hflights_db %>% tbl("weather")
    # work with data as if they are local data frames
      flights_db %>% left_join(planes_db)
    # operations are lazy, until you see the data
    # show sql and show plan
      flights_db %>% filter(n > 10) %>% explain()
    # get all data locally
      hourly_local <- collect(hourly)
  learning sql
    how indices work
      sqlite.org/queryplanner.html
    how select works
      10 easy steps to a complete understanding of sql
  bind_rows
    do.call(rbind, x) # ==
    bind_rows(x)
  aggregate with summarize
    # <url:Dropbox (BTG)/TEUIS PROJECT 05-ANALYSIS/working_library/requirements_database/scripts/prepare_rdb_field_operations.R#p108_rdb_gfield_aggregate_dfield>
    # the result is not useful usually
    fld2 = fld %>%
      left_join(scr, by = "screen_id") %>%
      left_join(dfl, by = "data_entity_id") 
    fld3 = fld2 %>%
      group_by(field_id) %>%
      summarize(list(data_field_name))
    { # testing
      head(fld3)[[2]]
      paste(head(fld3)[[2]], collaps=",")
      toString(unlist(fld3[1,][[2]]))
      fld3[1:3,][[2]]
    }
    dfn = lapply( fld3[1:nrow(fld3),][[2]], toString ) %>% unlist
    fld4 = data_frame(
      field_id = fld3$field_id,
      data_field_name_aggregated = dfn
    )
    fld5 = fld %>%
      left_join(fld4, by = "field_id")
  errors
    # Error: `false` has type 'integer' not 'double'
      sip3 %>%
        mutate( bicak_sayisi = if_else( T, 1, as.integer(1) ))
      if_else'in tüm çıktıları aynı type'ta olmalı. fakat integer ve numeric farklı tipler.
      ex2
        if_else: Error: `false` has type 'logical' not 'double'
          if_else(TRUE, 1, NA)
          #> Error: `false` has type 'logical' not 'double'
          if_else(TRUE, 1, NA_real_)
          #> [1] 1
    error: index out of bounds
      join key mevcut değil
    Error: argument "x" is missing, with no default
      4: lapply(result, . %>% "kombin"[[]]) %>% bind_rows() %>% mutate(kombin_id = row_number()) at optimize_trim_in_one_step.R#70
      sebep: 
        result = list()
magrittr pipe
  basic
    x %>% f === f(x)
    x %>% f(y) === f(x,y)
    x %>% f %>% g === g(f(x))
  argument placeholder
    x %>% f(y, .) === f(y,x)
  reusing placeholder
    x %>% f(y = nrow(.)) === f(x, y = nrow(x))
    overrule this by enclosing in braces
    x %>% {f(y = nrow(.))} === f(y = nrow(x))
  unary function
    f <- . %>% cos %>% sin # ==
    f <- function(.) sin(cos(.)) 
  create functions (or functional sequences)
    mae <- . %>% abs %>% mean(na.rm = TRUE)
    mae(rnorm(10))
    #> [1] 0.5605
    ex
      n1 = lapply(filenames,
        . %>% nchar )
      n2 = filenames %>% 
        lapply( . %>% nchar )
      n3 = filenames %>% 
        lapply(function(x) nchar(x))
      n4 = filenames %>% 
        lapply(., function(x) nchar(x))
    rules:
      if dot is used, then first arg is not passed automatically
      if dot is used as lambda, then first arg is still passed
    exception
      works
        lapply(x, . %>% {ifelse(is.blank(.),NA,.)} )
      fails
        lapply(x, . %>% ifelse(is.blank(.),NA,.) )
      lesson:
        if using dot as inner arg, then first arg is automatically passed
    ex2: remove na records
      # opt1: works
      df = data.frame( id = c(1, 2, NA) )
      r1 = dplyr::filter( df, !is.na(df$id) )
      # opt2: doesn't work
      r2 = df %>%
        filter( !is.na(.$id) )
      # opt3: works
      r3 = df %>>%
        (dplyr::filter(., !is.na(.$id) ))
    ex3: mutate some columns
      # opt1: works
      df = data.frame( id = c(1, 2, NA) )
      as.character(df$id)
      # opt2: works
      r2 = df %>%
        mutate( id = as.character(id) )
  alias 
    equals add multiply_by
    extract [
      ecd %>% extract("independent_id")
    extract2 [[
    use_series $
      ecd %>% use_series("independent_id")
    ‘extract’                 ‘`[`’
    ‘extract2’                ‘`[[`’
    ‘inset’                   ‘`[<-`’
    ‘inset2’                  ‘`[[<-`’
    ‘use_series’              ‘`$`’
    ‘add’                     ‘`+`’
    ‘subtract’                ‘`-`’
    ‘multiply_by’             ‘`*`’
    ‘raise_to_power’          ‘`^`’
    ‘multiply_by_matrix’      ‘`%*%`’
    ‘divide_by’               ‘`/`’
    ‘divide_by_int’           ‘`%/%`’
    ‘mod’                     ‘`%%`’
    ‘is_in’                   ‘`%in%`’
    ‘and’                     ‘`&`’
    ‘or’                      ‘`|`’
    ‘equals’                  ‘`==`’
    ‘is_greater_than’         ‘`>`’
    ‘is_weakly_greater_than’  ‘`>=`’
    ‘is_less_than’            ‘`<`’
    ‘is_weakly_less_than’     ‘`<=`’
    ‘not’ (‘`n'est pas`’)     ‘`!`’
    ‘set_colnames’            ‘`colnames<-`’
    ‘set_rownames’            ‘`rownames<-`’
    ‘set_names’               ‘`names<-`’
  map function
    equvalent:
      lapply( rownames %>% {. %>% partial( path_array_exchange_listing_x, . )})
      rownames %>% { partial( path_array_exchange_listing_x, . ) }
      rownames %>% partialm(path_array_exchange_listing_x)
  argument placeholder
    x %>% f(y, .) === f(y,x)
  stepwise string-cleaning
    files %<>%
      basename %>%
      str_replace("...", "") %>%
      str_replace("...", "")
  paste
    "this" %>% paste("is not") %>% paste("a pipe")
  https://twitter.com/isthatsol/status/557981863432564739
    foo_foo %>%
      hop_through(forest) %>%
      scoop_up(field_mouse) %>%
      bop_on(head)
  assign and str
    x = x %T>% str
  using operations instead of aliases
    x %>% .[3] %>% `+`(3)
    setnames
      `names<-`
      `colnames<-`
      `rename
  tee: return lhs
    matrix(ncol = 2) %T>%
      plot %>%
      colSums
  exposition of variables
    iris %$% cor(Sepal.Length, Sepal.Width)
  define function on fly
    long_vector %>%
    lapply(
      . %>%
      one_action %>%
      two_action
    )
  lambdas (unary function)
    iris %>% 
      {
        n = sample(1:10, size = 1)
        H = head(., n)
        T = tail(., n)
        rbind(H, T)
      } %>%
  examples
    x[!is.na(x)] # equivalent in pipe where x is any vector.
      x %>% '['(is.na(.) %>% '!')
piper
  install.packages("pipeR")
  rules of magrittr
    if no dot, then pipe to first arg
    if naked, then pipe to dot
    if dot in expression, then pipe to first arg and dot
    if subexpression, then pipe ?
  rules of piper
    pipe to first argument and to . (dot)
    pipe to . only if followed expression is enclosed within:
      {}
      ()
      (x ~ f(x))
  example 
    f <- function(x, y, z = "nothing") {
      cat("x =", x, "\n")
      cat("y =", y, "\n")
      cat("z =", z, "\n")
    }
    > 1:10 %>% f(1, .-1)
    x = 1 2 3 4 5 6 7 8 9 10
    y = 1
    z = 0 1 2 3 4 5 6 7 8 9
    > 1:10 %>>% f(1, .)
    x = 1 2 3 4 5 6 7 8 9 10
    y = 1
    z = 1 2 3 4 5 6 7 8 9 10>
    > 1:10 %>>% ( f(min(.),max(.)) )
    x = 1
    y = 10
    z = nothing
  lambda expression
    use (x ~ f(x))
    > 1:10 %>>% (x ~ f(min(x), max(x)))
    x = 1
    y = 10
    z = nothing
pipe examples
  make_na
      filename %>% root_xbrl %>>% (x ~ NA),
      filename %>% root_xbrl %>>% function(x) NA,
  detect filename that causes error
    filenames %>% l_ply(. %T>% print %>% root_xbrl2, .progress = "text")
functional programming
  currying  
    partial
  generifying a function using functional programming
    write_xbrl_data_x_hd = write_array_fun('xbrl_data_x_hd')
    write_list_xbrl_data_x_hd = function(xbrl_data_list) {
      for (i in seq_along(xbrl_data_list)) {
        title = names(xbrl_data_list)[[i]]
        write_xbrl_data_x_hd(xbrl_data_list[[i]], title)
      }
    }
    ->
    write_list_fun = function(file) {
      write_file_fun = write_array_fun(file)
      function(df_l, arg, ...) {
        for (i in seq_along(df_l)) {
          title = names(df_l)[[i]]
          write_file_fun(df_l[[i]], title)
        }
      }
    }
  problem: how to make a function testable
    change its dependencies without changing its definition
    download_company_idx_files(use_cache = use_cache)
    solution 1: using partials
      definition
        .download_company_idx_files = function(year, quarter, use_cache = F ) {
          file.names = path_array_company_0000_qtr0_zip(year, quarter)
          for (i in 1:length(file.names)) {
            file.name = file.names[i]
            download.file(url, destfile=file.name, method="wget") #@ > company_0000-qtr0.zip
          }
        }
        download_company_idx_files_real = partial(.download_company_idx_files, year = 2009:year(Sys.Date()), quarter = 1:4)
        download_company_idx_files = download_company_idx_files_real 
      test code
        year = '2014'
        quarter = '4'
        download_company_idx_files_test = partial(.download_company_idx_files, year = year, quarter = quarter)
        download_company_idx_files = download_company_idx_files_test
    problem 2: we have lots of similar test functions. how to abstract commonalities?
      example:
        unzip_company_idx_files_test = partial(.unzip_company_idx_files, year = year, quarter = quarter)
        unzip_company_idx_files = unzip_company_idx_files_test
        convert_idx2csv_test = partial(.convert_idx2csv, year = year, quarter = quarter)
    solution 2: using higher order function generator for partial
      make_test_fun = function(fun) {
        function(year, quarter) {
          partial(fun, year = year, quarter = quarter)
        }
      }
      download_company_idx_files_test = make_test_fun(.download_company_idx_files)
      download_company_idx_files = download_company_idx_files_test(year, quarter)
      unzip_company_idx_files_test = make_test_fun(.unzip_company_idx_files)
      unzip_company_idx_files = unzip_company_idx_files_test(year, quarter)
    solution 3: abstract one more step
      make_test_fun = function(fun, year, quarter) {
        partial(fun, year = year, quarter = quarter)
      }
      download_company_idx_files = make_test_fun(.download_company_idx_files, year, quarter)
  three dots/ellipsis/...
    arguments <- list(...)
Rstudio
  custom shortcuts
    #F12  go to file/function
    #B    go to function definition
    ^O    navigate back
  View() Rstudio
    planes %>% filter(no.seats < 10) %>% View()
dplyrExtras
Conditional replace in-place
  mutate_if(df,a==3,x=100)
  use
    mutate( type = ifelse(  f$type == f$field, NA, f$type ) )
Select/arrange columns with character variables
  cols = c("mpg","cyl, hp:vs")
  mtcars %.%
    filter(gear == 3,cyl == 8) %.%
    s_select(cols)
  s_arrange(mtcars, c("gear", "-mpg"))
  normal way:
    select(mpg, cyl)
XML
  xmlParse()
    doc = xmlParse(file)
  xmlRoot()
    root = xmlRoot(doc)
  navigating
    xmlChildren(root)
    xmlName # name of node
  looping over nodes
    root %>% xmlChildren %>% lapply(xmlName) # ==
    root %>% xmlApply(xmlName)
  xpath
    links = xpathSApply(root, "path")
    link_attr_vals = xpathSApply(root, "path", xmlGetAttr, "href")
apply/ldply/foreach list generations
  cases for lapply, map
    we need to loop over this function:
      find_correct_tag = function(fn, revenue, xdca) {..}
    opt1: make xdca global
      ct = Map(find_correct_tag, vxtf$filename, vxtf$revenue) %>%
        rbindlist
    opt2: partial find_correct_tag
      fun1 = partial(find_correct_tag, xdca = xdca)
      ct = Map(fun1, vxtf$filename, vxtf$revenue) %>%
        rbindlist
    opt3: use seq_along in lapply
      ct = lapply(seq_along(vxtf$filename), 
        function(i, vxtf, xdca) 
          find_correct_tag(vxtf[i]$filename, vxtf[i]$revenue, xdca)
        , vxtf, xdca
      ) %>%
        rbindlist
  lapply over names
    example_apply_with_names = function() {
      ls = list( a = 3, b = 5 )
      # opt1 
      for (n in names(ls)) {
        print(ls[[n]] + 1)
      } 
      # opt2 
      lapply( seq_along(ls), function(i, ns, ls) {
          ls[[ns[i]]] + 1
        }, names(ls), ls)
      # opt4
      mapn(ls, function(elem, name) {
           print(elem + 1)
           print(name)
        })
    }
  lapply datatable columns
    lapply(data, function(x) sprintf(t, x))
  for loop functionals: lapply/sapply/vapply/mapply
    lapply for lists
      for rows of data frames: use apply
    lapply(l, f)
      apply f to each element of list
      return: list as l
    aggregating l elements with f
      lapply(l, f) %>% unlist # ==
      sapply(l, f)
      replicate(n, expr, simplify = "array")
        wrapper for sapply
      simplify
        result simplifed to vector, matrix, array?
      simplify = F, value: list
    Map
      lapply: one argument varies
      Map: multiple args
        Map(f, list1, list2)
      mtmeans <- lapply(mtcars, mean) 
      mtmeans[] <- Map(`/`, mtcars, mtmeans) # ==
      mtcars[] <- lapply(mtcars, function(x) x / mean(x))
    mapply
      variant of Map
      do.call vs. lapply
        do.call(fun, args)
          fun(args)
        lapply(args, fun)
          args passed to fun one by one
          fun(args[[1]])
        sprintf
          do.call( 'sprintf', list( fmt = t, data[,1], data[,2] ) ) # works
          arg = c( list(t), as.list(data) )
          do.call( 'sprintf', arg ) # works
          not works
            do.call( 'sprintf', t, data )
            do.call( 'sprintf', t, list(data[,1], data[,2]) )
            sprintf( t, list(data[,1], data[,2]) )
            do.call( 'sprintf', list( fmt = t, data ) )
Data Use Cases
  distinct/unique values of vector
    unique()
  switch(ext,
    txt=dir_filings_txt(),
    xml=dir_filings_xbrl(),
    zip=dir_filings_zip(),
    xbrl=dir_filings_xbrl())
  duplicates
    duplicated rows in dplyr
      # opt1: duplikasyonları ve orjinal kayıtları siler
      dups8 = dd8 %>%
        group_by( entity_name, data_field_name ) %>%
        filter( n() > 1 )
      # opt2: sadece duplikasyonları siler
      dups8b = dd8 %>%
        distinct( entity_name, data_field_name, .keep_all = T ) 
      duplicated_rows = function(df, column) {
        fld %>%
          group_by_(column) %>%
          filter( n() > 1 )
      }
      duplicated_rows(fld, "field_id")
    duplicated(vec) # T, F, T
    get both: duplicated and its reference
      x = c(1,3,1)
      duplicated(x) | duplicated(x, fromLast = T)
    Remove duplicate rows 
      dups = duplicated( dt$cik )
      dt[!dups] 
    filter and select duplicate values
      v = filter_nonna(df, "enum_id")$enum_id
      v[ duplicated(v) ]
      ===
      duplicated_values = function(df, column) {
        v = filter_nonna(df, column)[[column]]
        v[ duplicated(v) ] %>% unique
      }
      duplicated_values(df, "enum_id")
    all_unique = function(v) { duplicated(v) %>% sum == 0 }
  which(logical) # 1, 3
  is.na(d1)
  rep(x, times)
  unlist(x) # flatten
  do.call('fun', iterable) # fun(iterable[1], iterable[2] ..)
  Access last value
    tail(vector, n=1)
    data frame :
      x[length(x[,1]),]
      x[dim(x)[1],]
      x[nrow(x),]
  is.null check: is.blank() in utils.R
  split df by filename
    split(df, df$filename)
  Queries/Subsetting
    Assignment if true
      df$agecat[age > 75] <- "Elder"
    how many exists?
      a = length( which(x$category == 'I.setosa') )
    non na values from vector
      d[!is.na(d)]
    non na rows from df
      filter_nonna = function(df, column) {
        df[!is.na(df[[column]]), ]
      }
      filter_nonna(df, "enum_id")
  Growing
    build parts then join them
      using for loop
        rl = vector('list', n)
        for(i in 1:n) {
          rl[[i]] = data.table(..)
        }
        dt = do.call('rbind', rl)
      dt = rbindlist(rl) # better
        rbindlist bug
          when columns order is different, rbindlist will produce nonsense 
          use use.names=T
  Serialization
    saveRDS(women, "women.rds")
    women2 <- readRDS("women.rds")
    dput(mean, "foo") # write in ascii
    bar <- dget("foo")
    unlink("foo") # remove
  Conversions
    dataframe to datatable
      den = read_excel2(path, 'DataEntity') %>% data.table
    list to data frame/table
      opt1
        my.df <- do.call('rbind', my.list)
        rbindlist(my.list)
      opt2
        as.data.frame(e)
      opt3: differing sizes
        test4 <- list('Row1'=letters[1:5], 'Row2'=letters[1:7], 'Row3'=letters[8:14])
        as.data.table(test4)
      http://www.r-bloggers.com/converting-a-list-to-a-data-frame/
    dataframe to list conversion
      # 2: transpose and as.list. elements are vectors
        dl2 = df %>%
          t %>%
          as.data.frame %>%
          as.list
      # 3: unlist. elements are vectors
        dl3 = df %>%
          apply(1, list) %>%
          unlist(recursive = F)
    convert vector to list
      as.list(c(1,2,3)
    vector to list
      kn <- c("1", "a", "b")
      nl <- vector(mode="list", length=length(kn)-1)
      names(nl) <- kn[-1]
      ml <- lapply(nl, function(x) kn[1])
      ml
        $a
        [1] "1"
        $b
        [1] "1"
    build list from a vector and multiple valued vector
      input
        > tags
        [[1]]
        [1] "Revenues"                "SalesRevenueServicesNet"
        [[2]]
        [1] "Revenues"
        > f
        [1] "1000045-0001193125-14-237425" "1000180-0001000180-15-000013"
      target
        ft 
        [[1]]
        $filename "100045"
        $tags list
          [[1]] "Revenues" ...
      using for loop
        ft = vector("list", length(f))
        for (i in seq_along(ft)) {
          ft[[i]]$filename = f[i]
          ft[[i]]$tags = tags[[i]]
        }
      using lapply
        ft2 = lapply(seq_along(f), 
          function(i, f, tags)
            list(
              filename = f[[i]],
              tags = tags[[i]]
            ),
          f, tags
          )
        identical(ft, ft2)
  Generate Test Data
    sample_with_replace = function(v, n = 100) sample(v, size = n, replace = T)
    sample_datatable = function(dt, n = 100) dt[ sample(nrow(dt), size = n) ]
    auction_data = data.frame(
      Price = 1:100 %>% sample_with_replace)
    s = auction_data %>% sample_datatable(5)
  read header of csv only
    con = file("data/flights4.csv")
    open(con)
    h4 = read.table(con, skip = 0, nrow = 1, sep = ",") %>% 
      unlist %>% unname
    close(con)
  complete
    It turns implicitly missing values into explicitly missing values.
    df <- data_frame(
      group = c(1:2, 1),
      item_id = c(1:2, 2),
      item_name = c("a", "b", "b"),
      value1 = 1:3,
      value2 = 4:6
    )
    df %>% complete(group, c(item_id, item_name))
      group item_id item_name value1 value2
      1       1         a      1      4
      1       2         b      3      6
      2       1         a     NA     NA
      2       2         b      2      5
    df
      Source: local data frame [3 x 5]
      group item_id item_name value1 value2
      1       1         a      1      4
      2       2         b      2      5
      1       2         b      3      6
Platform
  install from github
    library("devtools")
    install.packages("devtools")
    install_github("repo/username")
  update all packages from CRAN
    update.packages(checkBuilt=TRUE, ask=FALSE)
  Performance
    measure time
      system.time(for(i in 1:100) mad(runif(1000)))
    profiling
      Rprof('file')
      # code
      Rprof(NULL)
      summaryRprof('file')
  System
    system(cmd)
    system(cmd, intern=T)
      capture output of command 
    calling R from shell
      bash
        Rscript RscriptEcho.R study_rscript1.R test 10
      study_rscript1.R
        #! /usr/bin/Rscript --vanilla --default-packages=utils
        args <- commandArgs(TRUE)
        print(args)
    taking argument in R scripts
      args <- commandArgs(trailingOnly = TRUE)
      print(args)
  initial/startup/default session settings
    ~/.Rprofile
  options/settings
    options(max.width=100)
    GetOption("max.width")
    options(max.print=100)
    options(max.print=6)
rmarkdown
  ref
    http://rmarkdown.rstudio.com/
    https://www.rstudio.com/wp-content/uploads/2016/03/rmarkdown-cheatsheet-2.0.pdf
    https://www.rstudio.com/wp-content/uploads/2015/03/rmarkdown-reference.pdf
  render("input.Rmd", "pdf_document")
  getting started
    rstudio > file > new > rmarkdown > .html
      örnek bir şablon dosya açılır
    button bar > knit
  install
    library("rmarkdown")
    install.packages("rmarkdown")
  run
    rmarkdown::render("input.Rmd")
    render("input.Rmd")
    render("input.Rmd", "pdf_document")
  notebook in rstudio
  code chunks in rstudio
    #!i add new chunk
  code languages
    bash, python, sql, js
  parameters
    heading içinde parametre tanımlayabilirsin
    code  
      ---
      params: 
        data: "hawaii"
      ---
      data(list = params$data)
    setting parameters values
      render("file.Rmd", params = list(data = "niagara"))
  markdown format
    inline code
      formatting
        `kpv['kw003']` 
      evaluate expression
        `r kpv['kw003']` 
    embedding mathematical formulas
      latex
        $\frac{1}{n}$
    export to pdf
      opt
        output: pdf_document
        output: beamer_presentation
      pdf_document
        produces normal pdf doc
        render("input.Rmd", "pdf_document")
      beamer_presentation
        produces slides
        note: single heading level
    embedding code
      basic
        ```r
        code()
        ```
    table
      table of figures/data using kable
        http://yihui.name/printr/
        http://kbroman.org/knitr_knutshell/pages/figs_tables.html
        knitr::kable(matrix, digits = 2, caption = "A table produced by printr.")
      inside markdown
        ex1
          Firs | Secont
          -----|-------
          conte|con2
          cmo  | con 3
        ex2
          | name      | address         | phone   |
          |-----------------|--------------------------|------------|
          | John Adams    | 1600 Pennsylvania Avenue | 0123456789 |
          | Sherlock Holmes | 221B Baker Street   | 0987654321 |
        ex3
          |-----------------|--------------------------|------------|
          | name      | address         | phone   |
          |-----------------|--------------------------|------------|
          | John Adams    | 1600 Pennsylvania Avenue | 0123456789 |
          | Sherlock Holmes | 221B Baker Street   | 0987654321 |
          |-----------------|--------------------------|------------|
  output formats
    render("input.Rmd", output_format = "pdf_document")
    opt
      documents
        html notebook: interactive notebooks
        html document
        pdf
        word
        rtf
        md
      presentations
        ioslides
        beamer
    output options
      ex: table of contens
        ---
        output:
          html_document:
            toc: true
      help on options
        ?html_document
  notebooks
    ref
      http://rmarkdown.rstudio.com/r_notebooks.html
    interactive
      open from rstudio
      open in browser
        output: html_notebook
  presentation
    Presenter Mode
    add this to the end of the url while starting
      ?presentme=true
      /Users/mertnuhoglu/projects/dewey/data_analysis_presentations/istanbulcoders/input.html?presentme=true
    adding to slides
      <div class="notes">
      this is notes
      </div>
    new slide
      # title only
      ## title and text
      ----
        no title
    display modes
      f   full
      w   wide
      o   overview
      h   highlight
      p   presenter
  dashboards
  websites
    each .Rmd = page of site
    _site.yml
      header
  interactive documents 
    htmlwidgets
    shiny
other
  override functions
    filter = dplyr::filter
  open browser from R
    browseURL(url)
gis use cases
  Türkiye illerinin poligon haritası
    library(rgdal)
    library(leaflet)
    file =  "ne_50m_admin_0_countries.zip"
    download.file(file.path('http://www.naturalearthdata.com/http/',
        'www.naturalearthdata.com/download/50m/cultural',
        'ne_50m_admin_0_countries.zip'), 
        destfile = file)
    unzip(file)
    world <- readOGR(tempdir(), 'ne_50m_admin_0_countries', encoding='UTF-8')
    leaflet() %>%
      addTiles() %>%
      addPolygons(data=subset(world, name %in% "Turkey"), weight=2)
igraph
  ex
    library(igraph); 
    dor = data.frame( from = c(1, 1, 2, 3, 4, 5, 6), 
      to = c(2, 3, 4, 5, 6, 6, 7) )
    g = graph_from_data_frame(dor)
    plot(g)
  dependency ordering
    opt1: topological sorting
      g <- graph_from_data_frame(deps)
      t = topo_sort(g)
      which_loop(g)
      V(g)
      V(g)$name
      names(t)
    opt2: shortest path distances
      ex1
        lfs = data.frame( from = c('start', 'x', 'x', 'y'), 
                          to = c('x', 'y', 'p', 'z'), 
                          level = 0)
        # create graph from data.frame
        g <- graph_from_data_frame(lfs)
        # find distances from chosen node
        distances(g, "start")
      ex2
        g <- graph_from_data_frame(deps)
        d = distances(g, "EnumCategory")[1, ]
        dst = data_frame(
          entity_name = names(d),
          dependency_level = d
        )
  plot parameters
    http://kateto.net/network-visualization
  interactive params
    library(manipulate)
    ced4 = ced3 %>%
      mutate( ind = str_replace(ind, "_enum_id", "") ) %>%
      mutate( dep = str_replace(dep, "_enum_id", "") ) 
    g = graph_from_data_frame(ced4)
    l <- layout.grid(g)
    pdf("tmp/complex_enum_dependencies.pdf")
    plot(g, 
      layout = l,
      edge.arrow.size=0.2, 
      vertex.size = 5, 
      vertex.shape = "none",
      vertex.label.cex = 0.2
    )
    dev.off()
    manipulate(
      plot(g, 
        layout = l,
        edge.arrow.size=eas, 
        vertex.size = vs, 
        vertex.shape = "none",
        vertex.label.cex = vlc
      ),
      eas = slider(0,2,initial = 0.5, step = 0.1),
      vs = slider(0,10,initial = 5, step = 0.1),
      vlc = slider(0,2,initial = 1, step = 0.1)
    )
rest client - httr
  get with no args:
    jsonlite
    json - rest <url:#r=sr_0004>
  post and real rest
    httr
  GET 
    r <- GET("http://localhost:8080/greeting", 
      query = list(name = "Mert")
    )
    str(content(r))
    # List of 2
    #  $ id     : int 2
    #  $ content: chr "Hello, Mert!"
  content(r)$content
  # [1] "Hello, Mert!"
json - rest id=sr_0004
  json - rest <url:#r=sr_0004>
  jsonlite
    tercih
  all.equal(mtcars, fromJSON(toJSON(mtcars)))
  result = jsonlite::fromJSON("data/input/postman_20160719.json")
    bunu kullan
  jsonlite::fromJSON("data/arcgis/aws.json", simplifyDataFrame = T)
    df üretir, bazen
  emdkj = jsonlite::fromJSON("data/arcgis/emdk.json")$services
    direk df döndü
  other
    result = rjson::fromJSON(file="data/input/postman_20160719.json")
  call rest api
    https://cran.r-project.org/web/packages/jsonlite/vignettes/json-apis.html
    hadley_orgs <- fromJSON("https://api.github.com/users/hadley/orgs")
    response: 
      json [{..}, {..}]
      ->
      dataframe
    ex
      [
        {
          "login": "ggobi",
          "id": 423638,
          "url": "https://api.github.com/orgs/ggobi",
          "repos_url": "https://api.github.com/orgs/ggobi/repos",
          "events_url": "https://api.github.com/orgs/ggobi/events",
          "hooks_url": "https://api.github.com/orgs/ggobi/hooks",
          "issues_url": "https://api.github.com/orgs/ggobi/issues",
          "members_url": "https://api.github.com/orgs/ggobi/members{/member}",
          "public_members_url": "https://api.github.com/orgs/ggobi/public_members{/member}",
          "avatar_url": "https://avatars2.githubusercontent.com/u/423638?v=3",
          "description": ""
        },
      ->
      hadley_orgs %>% s
      'data.frame'
       $ login             
       $ id                
       $ url               
       $ repos_url         
       $ events_url        
       $ hooks_url         
       $ issues_url        
       $ members_url       
       $ public_members_url
       $ avatar_url        
       $ description       
yaml
  library('yaml')
  install.packages('yaml')
  install_github("MangoTheCat/rematch")
  yaml.load(aString)
  yaml.load_file(apath)
  as.yaml(obj)
assert_that
  https://github.com/hadley/assertthat
  install.packages('assertthat')
    library('assertthat')
  replacement for stopifnot
    assert_that(is.character(x))
    # Error: x is not a character vector
  examples
    assert_that( all_nonna(de$data_entity_id) )
    assert_that( nrow(dd) == nrow(dd3) )
    assert_that( none(n1 & n2) )
    assert_that( (sum(n1) + sum(n2) + sum(n3)) == nrow(dd4) )
    assert_that( setequal(dd$entity_name, de$entity_name) )
    assert_that( nrow(dd2) == 0 )
    assert_that( all_unique(df$data_field_id) )
rio
  install.packages("rio")
  library("rio")
  ev = import("data/enum_value.csv")
  export(ev, "data/enum_value.tsv")
  ev2 = import("data/enum_value.tsv")
  all.equal(ev, ev2, check.attributes = F)
  convert("data/enum_value.csv", "data/enum_value.tsv")
  # Rscript -e "rio::convert('data/enum_value.csv', 'data/enum_value.tsv')"
pryr
  install.packages("pryr")
  library("pryr")
## rmarkdown
  install.packages("rmarkdown")
  library("rmarkdown")
  run
  rmarkdown::render("input.Rmd")
  render("input.Rmd", "pdf_document")
  Presenter Mode
  add this to the end of the url while starting
    ?presentme=true
    /Users/mertnuhoglu/projects/dewey/data_analysis_presentations/istanbulcoders/input.html?presentme=true
  adding to slides
    <div class="notes">
    this is notes
    </div>
## slidify
  install
    install_github('ramnathv/slidify')
    install_github('ramnathv/slidifyLibraries')
  create deck
    library(slidify)
    author("slidify-demo-01")
  push to github
    github: create a new repo "slidify-demo-01"
    git remote add origin https://github.com/mertnuhoglu/slidify-demo-01.git
    local: git add+commit
  generate/update deck
    slidify("index.Rmd")
  publish github
    publish(user = "mertnuhoglu", repo = "slidify-demo-01", host = "github")
  open
    http://mertnuhoglu.github.io/slidify-demo-01/index.html
  publish dropbox
    publish(user = "mydeck", host = "dropbox")
  open
    https://dl.dropboxusercontent.com/u/103580364/mydeck/index.html
  extensions and themes
    http://ramnathv.github.io/slidifyExamples/
    http://slidify.org/style.html
    http://stackoverflow.com/questions/19348763/how-i-can-include-the-use-of-the-extension-deck-automation-js-when-i-create-a-do
      http://slidify.github.io/add-deck-ext/
  deckjs framework
    https://raw.githubusercontent.com/ramnathv/slidifyExamples/gh-pages/examples/deckjs/index.Rmd
    put into heading part (indent with spaces)
      framework: deckjs
      deckjs:
        transition: horizontal-slide
        extensions: [goto, hash, menu, navigation, scale, status]
    themes
      web-2.0
      swiss
    shortcuts
      m    view menu
      g#  go to slide
  add extensions
    http://slidify.github.io/add-deck-ext/
  add extension: automatic.js
    setup
      curl -o automatic.zip https://github.com/rchampourlier/deck.automatic.js/archive/master.zip
      unzip -oq automatic.zip deck.automatic.js-master/automatic/ 
      mv deck.automatic.js-master/automatic libraries/frameworks/deckjs/extensions/
      rm automatic.zip
      rm -r deck.automatic.js-master
    add to heading
      extensions: [goto, hash, menu, navigation, scale, status, automatic]
    add snippet to libraries/frameworks/deckjs/partials/snippets.html
      <!-- Initialize the deck -->
      <script>
      $(function() {
        // required only if the automatic extension is enabled.
        $.extend(true, $.deck.defaults, {
        automatic: {
          startRunning: false,  // true or false
          cycle: false,      // true or false
          slideDuration: 10000 // duration in milliseconds
        }})
        $.deck('.slide');
      });
      </script>
    add play/pause buttons to libraries/frameworks/deckjs/layouts/deck.html
      <!-- Begin slides -->
      {{{ page.content }}}
      <div class='deck-automatic-link' title="Play/Pause">Play/Pause</div>
  use cases
    impressjs
      visually stunning
    deckjs
      easy to use
    landslide
      question answer
    flowtime
      hierarchical

  multiple presentations
    subdirectory
      author("new_slidify_project")
      cd new_slidify_project
    new file
      cp index.Rmd new_slideshow.Rmd
      slidify("new_slideshow.Rmd")
potential bugs
  data.frame objelerinde factor -> numeric hatası
    data.frame numeric bir değeri factor'e çevirebilir
    sonra bunu geri numeric'e çevirdiğinde, farklı bir değer alırsın
    bu yüzden asla as.data.frame kullanma, as_data_frame kullan
transliterate
  iconv
    x = "Addyişm__NİO_Yasamal.PDF"
    iconv(x, "utf-8", "ASCII//TRANSLIT")
  stringi
    label %>%
      stri_trans_totitle( locale = "tr_TR" )
  regex
    transliterate_tr_to_ascii = function( lines ) {
      lines %>%
        str_replace_all(c("ü"="u", "ö"="o", "ı"="i", "Ü"="U", "Ö"="O", "İ"="I", "ş"="s", "ğ"="g", "ç"="c", "Ş"="S", "Ğ"="G", "Ç"="C", "ə"="e", "Ə"="E"))
    }
tidyr
  tutorial
    https://rpubs.com/bradleyboehmke/data_wrangling
  extract_numeric
    mutate(valuation = extract_numeric(`Valuation ($B)`))
  gather
    takes multiple columns, gathers them into key-value pairs
    wide to longer
  spread
    takes key-value columns, spreads into multiple columns
    logn to wider
  separate
    split single column into multiple
  unite
    unite multiple columns into single
  gather
    ex
      data
        trt wT1 hT1 wT2 hT2
        ....
      output
        trt key value
        ..  wT1 ..
        ..  wT2 ..
      api
        gather( data, key, value, ..)
          data: df
          key: column for new variable
          value: column for values
        gather( data, key, value, wT1:hT2)
  reshape
    reverse gather
    ex
    api
      data
        trt key value
        ..  wT1 ..
        ..  wT2 ..
      output
        trt wT1 hT1 wT2 hT2
        ....
      spread(data, key, value, ..)
        params
          data: df
          key: column to convert
          value: new column
        error: duplicate identifiers for rows
          bir identifier eklemelisin
          mutate( row = row_number() ) 
      gather( data, key, value)
  separate
    ex
      data
        yr  qtr rev
        ..  q1  10
        ..  q2  20
      output
        yr  int id  rev
        ..  q   1   10
        ..  q   2   20
      api
        separate( data, col, into, sep)
          data: df
          col: current var
          into: new variables
          sep: separator
  unite
    reverse of separate
database
  RPostgreSQL
    dbDriver
    dbConnect
      dbDisconnect
    dbApply: apply function to each row
    dbCallProc: call stored procedure
    dbCommit
      dbRollback
    dbGetInfo
      dbGetInfo(rs, what = "rowsAffected")
      names(dbGetInfo)
    ex
      dbDriver
      con = dbConnect(..)
      df = dbGetQuery(con, ..)
      rs = dbSendQuery(..)
      df = fetch(rs, n = -1)
    dbDataType.
    dbListTables
    dbReadTable
      dbRemoveTable
      dbWriteTable
      dbExistsTable
    rs = dbSendQuery
      df = dbGetQuery
  ROracle
    install.packages("ROracle")
    installing
      tutorial
        http://www.baldwhiteguy.co.nz/technical/index_files/mac-osx-oracle-instantclient.html
      download:
        instantclient-basic-macos.x64-11.2.0.4.0.zip
        instantclient-sdk-macos.x64-11.2.0.4.0.zip
        instantclient-sqlplus-macos.x64-11.2.0.4.0.zip
        put into ~/tools/oracle/instantclient_11_2
      setup
        cd ~/tools/oracle/instantclient_11_2
        ln -s libclntsh.dylib.11.1 libclntsh.dylib
        export PATH=~/tools/oracle/instantclient_11_2:$PATH
      https://docs.oracle.com/cd/E11882_01/install.112/e38228/inst_task.htm#BABHEBIG
        export ORACLE=$HOME/tools/oracle/instantclient_11_2
        export PATH=$ORACLE:$PATH
        export DYLD_LIBRARY_PATH=$ORACLE
        export NLS_LANG=$ORACLE
        export OCI_LIB_DIR=$ORACLE
        export OCI_INC_DIR=$ORACLE/sdk/include
        sqlplus
      install ROracle
        http://dba.stackexchange.com/questions/66424/how-to-install-roracle-on-linux
          R CMD INSTALL --configure-args='--with-oci-lib=/Users/mertnuhoglu/tools/oracle/instantclient_11_2 --with-oci-inc=/Users/mertnuhoglu/tools/oracle/instantclient_11_2/sdk/include' ROracle_1.2-2.tar.gz

    connection
      opt1
        drv <- dbDriver("Oracle")
        username = "system"
        password = "..."
        dbname = "52.73.23.191:1521/btgdev"
        con <- dbConnect(drv, user = username, password = password, dbname = dbname)
      opt2
        drv <- dbDriver("Oracle")
        username = "system"
        password = "..."
        host = "52.73.23.191"
        port = "1521"
        sid = "btgdev"
        connect.string <- paste( 
          "(DESCRIPTION=",
          "(ADDRESS=(PROTOCOL=tcp)(HOST=", host, 
          ")(PORT=", port, "))", 
          "(CONNECT_DATA=(SID=", sid, ")))", sep = "")
  RJdbc
    install.packages("RJDBC")
purrr
  reduce
    ex
      <url:file:///~/Dropbox/mynotes/content/mine/study_assign_kombin_termin.R>
    ex1
      # step4: full_join için for loop kullan
      res = evls[[1]]
      for ( i in 2:length(evls) ) {
        res = res %>%
          full_join( evls[[i]], by = "dependent_id" )
      }
      # step5: reduce ile yap
      full_join_by_dependent_id = function( evl1, evl2 ) {
        evl1 %>%
          full_join( evl2, by = "dependent_id" )
      }
      evls %>% reduce( full_join_by_dependent_id )
password kullanma
  env variable tanımla
    .Renviron içinde
  password = Sys.getenv("LERIS_ORACLE_BTG_MIS_PASSWORD")
dplyrOracle
  install_github("tomasgreif/dplyrOracle")
  ref
    # <url:file:///~/Dropbox (BTG)/TEUIS PROJECT 05-ANALYSIS/working_library/requirements_database/scripts/study_oracle.R>
datatree data.tree
  convert_parent_child_fk_into_pathString
    ref
      study_convert_parent_child_fk_into_pathString()
    ex
      get_parent_path = function( df, ids ) {
        get_parent_pathe = function(df, id) {
          find_parent = function(df, pid) {
            filter(df, id == pid)$parent_id
          }
          id_path = id
          pid = df[id, ]$parent_id
          while (!is_na(pid)) {
            id_path = c(id_path, pid)
            pid = find_parent(df, pid)
          }
          paste( filter(df, id %in% rev(id_path))$name, collapse = "/" )
        }
        r = vector('character')
        for (id in ids) {
          r = c(r, get_parent_pathe(df, id))
        }
        return(r)
      }
      # input data:
      df = data.frame(id = 1:3, name = c("asia", "iran", "tehran"), parent_id = c(NA, 1, 2), stringsAsFactors=F)
      # id,name,parent_id
      # 1,asia,NA
      # 2,iran,1
      # 3,tehran,2
      # target data:
      target = data.frame(id = 1:3, name = c("asia", "iran", "tehran"), parent_id = c(NA, 1L, 2L), pathString = c("asia", "asia/iran", "asia/iran/tehran"), stringsAsFactors=F)
      # id,name,parent_id,pathString
      # 1,asia,NA,asia
      # 2,iran,1,asia/iran
      # 3,tehran,2,asia/iran/tehran
      result = df %>%
        mutate( pathString = get_parent_path(., id) )
      result
      #      id   name parent_id       pathString
      #   <int>  <chr>     <int>            <chr>
      # 1     1   asia        NA             asia
      # 2     2   iran         1        asia/iran
      # 3     3 tehran         2 asia/iran/tehran
      dt <- as.Node(result)
      #        levelName
      # 1 asia
      # 2  °--iran
      # 3      °--tehran
rhandsontable
  install.packages("rhandsontable")
shiny
  publishing to shinyapps.io
    shinyapps.io/admin
    new domain name
    authorize account
    library(rsconnect)
    rsconnect::deployApp('path/to/your/app')
      rsconnect::deployApp('shiny/lesson01')
  conf
    location
      site_dir
        folder where multiple apps are stored in each folder
      app_dir
        only one application
      ex
        # Define the location '/specialApp'
        location /specialApp {
          # Run this location in 'app_dir' mode, which will host a single Shiny
          # Application available at '/srv/shiny-server/myApp'
          app_dir /srv/shiny-server/myApp
        }
        # Define the location '/otherApps'
        location /otherApps {
          # Run this location in 'site_dir' mode, which hosts the entire directory
          # tree at '/srv/shiny-server/apps'
          site_dir /srv/shiny-server/apps;
        }
    ...
opencpu
  use an external library function inside your own package
    ex:
      upload_run <- function(file) {
        rio::import(file)
      }
    note: import() alone gives error because opencpu doesn't load packages by itself
  install package to global library id=sr_0005
    install package to global library <url:#r=sr_0005>
    note: give permission first to write to global library
      cd /usr/local/lib/R
      sudo chmod o+w site-library
      opt: use custom container:
        mertnuhoglu/opencpu_libs:2
      else: ocpu.call paths will be different
        http://localhost:8004/ocpu/user/opencpu/library/vrpdata/R/hello
    1. step: install into global library
      .libPaths("/usr/local/lib/R/site-library")
    2. create new package / open package
      help
        create your own functions in rstudio <url:#r=sr_0006>
    3. rstudio > build & load
      devtools::load_all()
      devtools::install()
    4. check if library is installed correctly
  function calls
    curl url
      mypackage
        fun
      GET
        http://localhost:8004/ocpu/library/mypackage/R/fun/print
      POST
        http://localhost:8004/ocpu/library/mypackage/R/fun/
          returns several temporary urls
        http://localhost:8004/ocpu/library/mypackage/R/fun/json
          returns result directly
    js calls
      ref
        ~/projects/itr/vrp/frontend/views/test_pug/opencpu_*.pug
      ex: 04 
        ocpu.seturl("//localhost/ocpu/library/stats/R")
        var req = ocpu.call("rnorm", {n: 100, mean: rnd}, function (session) {..}
      ex: base
        ocpu.seturl("//localhost/ocpu/library/base/R")
        var req = ocpu.call("identity", { "x": mysnippet }, function (tmp) {
      ex: 05
        ocpu.seturl("//localhost/ocpu/library/utils/R")
        var csv = $("#input").val();
        var req = ocpu.call("read.csv", {text: csv}, function (session) {
      ex: 06
        ocpu.seturl("//localhost/ocpu/library/utils/R")
        var arg = [1,2,3];
        var req = ocpu.call("str", {object: arg}, function (session) {
      ex: 07
        ocpu.seturl("//localhost/ocpu/library/utils/R")
        var arg1 = [[1,2,3], [10,20,30]];
        var req1 = ocpu.call("str", {object: arg1}, function (session) {
      ex: 08_df custom package
        ocpu.seturl("//localhost/ocpu/user/opencpu/library/pmf/R")
        var arg1 = [[1,2,3], [10,20,30]];
        var req1 = ocpu.call("upload_data", {mat: arg1}, function (session) {
      ex: upload01 file upload
        <b>CSV File</b> <input type="file" id="csvfile">
        ocpu.seturl("//localhost/ocpu/library/utils/R")
        var myheader = $("#header").val() == "true";
        var myfile = $("#csvfile")[0].files[0];
        var req = ocpu.call("read.csv", { "file": myfile, "header": myheader }, function (session) {
      ex: upload02 file upload (rio)
        ocpu.seturl(`//${ocpu_domain}/ocpu/library/rio/R`)
        var req = ocpu.call("import", { "file": myfile }, ..
  run docker container
    docker pull opencpu/rstudio
    docker run -p 80:80 -p 8004:8004 --name ocp \
      -v /Users/mertnuhoglu/projects/itr/plan_management_frontend/r/pkg/:/home/opencpu \
      opencpu/rstudio 
    hata: sigwinch signal docker container'ı kapatıyor
      -t opsiyonunu kaldır
      --sig-proxy=false ile çalıştır
    hata: tüm portlar çalışmıyor
      80, 8004 çalışıyor
      8090, 9090 çalışmıyor
    opt
      docker run -t -p 80:80 -p 8004:8004 --name ocp --sig-proxy=false opencpu/rstudio 
      docker run -t -p 80:80 -p 8004:8004 --name ocp opencpu/rstudio 
      docker run -p 80:80 -p 8004:8004 --name ocp opencpu/rstudio 
    docker exec -i -t ocp /bin/bash
  test api
    http://localhost/ocpu/test/
      ../library/stats/R/rnorm/json
        n 3
        mean  10
        sd  10
      ../library/utils/R/read.csv
        file
      open Location
        http://localhost/ocpu/tmp/x040cd9fda3/
        http://localhost/ocpu/tmp/x040cd9fda3/R/.val/print
        http://localhost/ocpu/tmp/x040cd9fda3/R/.val/json
        http://localhost/ocpu/tmp/x040cd9fda3/R/.val/csv
        http://localhost/ocpu/tmp/x040cd9fda3/files
        http://localhost/ocpu/tmp/x040cd9fda3/files/siparis.csv
        http://localhost/ocpu/tmp/x040cd9fda3/info
      use key as argument
        ../library/ggplot2/R/qplot
          x en
          y boy
          data x040cd9fda3
        open Location
          http://localhost/ocpu/tmp/x05680d0a35/
          http://localhost/ocpu/tmp/x05680d0a35/graphics/1/png
          http://localhost/ocpu/tmp/x05680d0a35/graphics/1/png?width=1200
          http://localhost/ocpu/tmp/x05680d0a35/console/text
            > qplot(x = en, y = boy, data = x040cd9fda3::.val)
            [[ plot ]]
          http://localhost/ocpu/tmp/x05680d0a35/source/text
  nodejs
    https://github.com/albertosantini/node-opencpu
    npm install -S opencpu
    ex
      var opencpu = require("opencpu");
      opencpu.rCall("/library/datasets/R/mtcars/json", {}, function (err, data) {
          if (!err) {
              console.log(data[0].mpg + data[1].mpg); // => 42
          } else {
              console.log("opencpu call failed.");
          }
      });
  test js client
    test R:
      function(x, n){  return(x^n) }
    ex:
      http://jsfiddle.net/opencpu/7torLdk9/
      /Users/mertnuhoglu/projects/itr/plan_management_frontend/views/test_pug/opencpu01.pug
      copy opencpu-0.4.js to public/js/lib
      layout.pug
        script(src='https://mertnuhoglu.com/js/lib/opencpu-0.4.js')
    ex02:
      ocpu.seturl("//localhost/ocpu/library/base/R")
    ex03: file upload 
      http://jsfiddle.net/opencpu/hc5b9w7r/
  docker opencpu rstudio image
    https://hub.docker.com/r/opencpu/rstudio/
    run
      docker run -p 80:80 -p 8004:8004 --name ocp \
        opencpu/rstudio 
    apps
      rstudio app
        http://localhost/rstudio/
        opencpu/opencpu
      test app
        http://localhost/ocpu/
  OpenCPU presentation at useR! 2014-kAfVWxiZ-Cc.mp4
    hello world
      curl .../library/stats/R/rnorm/json -H "Content-Type: .." -d '{"n":3, "mean":10, "sd":10}'
      ===
      library(jsonlite)
      args = fromJSON('{"n":3, "mean":10, "sd":10}')
      output = do.call(stats::rnorm, args)
      toJSON(output)
      ===
      rnorm(n=3, mean=10, sd=10)
    what opencpu does
      interoperable http api
      rpc and object management
      io: json, protocol buffers, csv
      parallel/async
      security  policies
      client libraries: js, ruby, ...
    keys and objects
      /Users/mertnuhoglu/Dropbox/public/img/ss-180.png
      curl -v http://../stocks/R/smoothplot -d 'ticker="GOOG"'
      ===
      POST /stocks/R/smoothplot
      Content-Type: application/x-www-form-urlencoded
    other: curl and http content type
      application/x-www-form-urlencoded or multipart/form-data?
        https://stackoverflow.com/questions/4007969/application-x-www-form-urlencoded-or-multipart-form-data
        2 ways to POST:
          application/x-www-form-urlencoded
          multipart/form-data
            to upload files
        these are two MIME types that HTML forms use
          http has no such limitation
        tl;dr
          if you have binary data or big payload
            multipart/form-data
          otherwise
            application/x-www-form-urlencoded
        MIME types are 2 Content-Type headers for POST requests
          purpose: to send a list of key/value pairs
        application/x-www-form-urlencoded
          body of http message: one giant query string
          separated by &
          key/value by =
          non-alphanumeric characters replaced by '%HH'
            thus triples payload for binary files
        multipart/form-data
          each key/value pair: a part in a MIME message
          each part has
            MIME headers like 
              Content-Type
              Content-Disposition: this gives a part is name (key)
        other content-types:
          application/json
      curl examples
        https://gist.github.com/joyrexus/524c7e811e4abf9afe56
        ex
          URL=https://foo.io/users/joyrexus/shoes
          Url-encoded
            curl -d "brand=nike" -d "color=red" -d "size=11" $URL
            curl --data "brand=nike&color=red&size=11" $URL
          Multipart
            curl --form "image=@nikes.png" --form "brand=nike" --form "color=red" --form "size=11" $URL
            curl -F "image=@nikes.png" -F "brand=nike" -F "color=red" -F "size=11" $URL
            Change the name field of a file upload part by setting filename=:
              curl -F "image=@nikes.png;filename={desired-name}.png" -F "brand=nike" -F "color=red" -F "size=11" $URL
            Specify Content-Type by using type=:
              curl -F "image=@nikes.png;filename={desired-name}.png;type=image/png" -F "brand=nike" -F "color=red" -F "size=11" $URL
      curl post examples
        https://gist.github.com/subfuzion/08c5d85437d5d4f00e58
        -d, --data <data> Send specified data in POST request. Details provided below.
        -F, --form <name=content> Submit form data.
        -X, --request The request method to use.
        common curl options
          request type
            -X POST
            -X PUT
          content type header
            -H "Content-Type: application/x-www-form-urlencoded"
            -H "Content-Type: application/json"
          data
            form urlencoded: -d "param1=value1&param2=value2" or -d @data.txt
            json: -d '{"key1":"value1", "key2":"value2"}' or -d @data.json
        common examples
          POST application/x-www-form-urlencoded
            application/x-www-form-urlencoded is the default:
              curl -d "param1=value1&param2=value2" -X POST http://localhost:3000/data
            explicit:
              curl -d "param1=value1&param2=value2" -H "Content-Type: application/x-www-form-urlencoded" -X POST http://localhost:3000/data
            with a data file
              curl -d "@data.txt" -X POST http://localhost:3000/data
          POST application/json
            curl -d '{"key1":"value1", "key2":"value2"}' -H "Content-Type: application/json" -X POST http://localhost:3000/data
            with a data file
              curl -d "@data.json" -X POST http://localhost:3000/data
        server.js
          var app = require('express')();
          var bodyParser = require('body-parser');
          app.use(bodyParser.json()); // for parsing application/json
          app.use(bodyParser.urlencoded({ extended: true })); // for parsing application/x-www-form-urlencoded
          app.post('/data', function (req, res) {
            console.log(req.body);
            res.end();
          });
          app.listen(3000);
    get R function's code
      http://../stocks/R/smoothplot/print
    response:
      Location: https://tmp.ocpu.io/xlklmk/
      this contains the output
    state in opencpu
      difference with other R web frameworks
      each request is stateless
        no single, permanent R process
      instead: "functional state"
        each rpc stores object and returns key. no side-effects
        use key to get stored object
    public.opencpu.org/ocpu/test
      /Users/mertnuhoglu/Dropbox/public/img/ss-181.png
      read.csv with a file
        /Users/mertnuhoglu/Dropbox/public/img/ss-182.png
      open Location
        /Users/mertnuhoglu/Dropbox/public/img/ss-183.png
        get dataframe
          /Users/mertnuhoglu/Dropbox/public/img/ss-184.png
        get as json
          /Users/mertnuhoglu/Dropbox/public/img/ss-186.png
        get as csv
          /R/.val/csv
      you can use this Location in other function calls
        /Users/mertnuhoglu/Dropbox/public/img/ss-187.png
        /Users/mertnuhoglu/Dropbox/public/img/ss-189.png
      each key is secret
        so no authentication is needed
    javascript client
      /Users/mertnuhoglu/Dropbox/public/img/ss-191.png
    create your own functions in rstudio id=sr_0006
      create your own functions in rstudio <url:#r=sr_0006>
      build it as a package from inside RStudio
      rstudio
        file > new project > new directory > r package >
          .package name
        rstudio > files > r > mypackage.R
          test = function(x) {
            ..
          }
        rstudio > build > build & reload 
        opencpu:
          ocpu/user/jeroen/library
            /mypackage/R/test
          docker içinde
            /ocpu/user/opencpu/library/pmf/R/hello
        docker içindeki package'ı lokalde değiştirmek
          lokalde değiştir
          dockerda tekrar "build and reload" de
          opencpu'da artık kullanılabilir
      note: custom functions have different path than other libraries if they are not installed into global library
        ref
          install package to global library <url:#r=sr_0005>
        http://localhost:8004/ocpu/user/opencpu/library/vrpdata/R/hello
      ex
        /Users/mertnuhoglu/Dropbox/public/img/ss-192.png
    workflow
      setup opencpu
      write functions
      make them a package
      call them from opencpu
    trying
      free public server
        public.opencpu.org/ocpu
      single dev server
        install.packages("opencpu")
        library(opencpu)
    public package/app on ocpu.io
      github webhook
        public.opencpu.org/ocpu/webhook
      add this webhook to your github/webhooks
      then everytime you push, it will be updated
      your package
        yourname.ocpu.io/pkgname
  OpenCPU API
    https://www.opencpu.org/api.html
    default root path: /ocpu/
    debugging:
      /ocpu/info
        shows sessionInfo
      /ocpu/test
        testing 
    http methods
      GET
        to retrieve a resource
      POST
        for RPC
      GET
        object
          /ocpu/library/MASS/R/cats/json
        file
          /ocpu/library/MASS/NEWS
      POST
        object
          /ocpu/library/stats/R/rnorm
        file
          /ocpu/library/MASS/scripts/ch01.R
    api libraries
      /ocpu/library/{pkgname}/
        r packages
      /ocpu/apps/{gituser}/{reponame}/
        packages in github
    r package api
      /{pkgname}/ support endpoints:
        /info
        /www
          apps included
        /R/
          r objects
        /data/
        /man/
        /man/{topic}/{format}
          format: text, html, pdf
        /html
        /*
          files
    r object api
      /R api: to read objects and call functions
        /R/
          list r objects
        /data/
          list data objects
        /{R|data}/{object}
          if object is a function, it is called using POST
        /{R|data}/{object}/{format}
    r session api
      session: holds resources from a rpc call
        /tmp/{key}/
          list available output
        /tmp/{key}/R
          r objects
        /tmp/{key}/
          graphics/
            graphics/{n}/{format}
          source
            input source code
          stdout
          console
          zip
            dl session as zip
          files/*
            file api in working dir
    output formats for r objects
      print
      json
      csv
      tab
      md
      feather
      png
      pdf
    argument formats for r function calls
      primitives
      json
      r code
      file upload
      temp key
    running scripts and documents
      file.r
      file.tex
      file.md
        knitr::pandoc
      file.rmd
        knitr::knit
    json io rpc
      ex
        /ocpu/library/stats/R/rnorm/json
          sonuç doğrudan json olur, GET request gerekmez
      ex
        curl http://cloud.opencpu.org/ocpu/library/stats/R/rnorm/json \
        -H "Content-Type: application/json" -d '{"n":3, "mean": 10, "sd":10}'
        [
        4.9829,
        6.3104,
        11.411
        ]
      equivalent to
        library(jsonlite)
        args <- fromJSON('{"n":3, "mean": 10, "sd":10}')
        output <- do.call(stats::rnorm, args)
        toJSON(output)
    opencpu apps
      put into /inst/www/ directory
      interfaces with R package functions
    github ci hook
  OpenCPU JS Client
    https://www.opencpu.org/jslib.html
    cdn
      <script src="//cdn.opencpu.org/opencpu-0.4.js"></script>
    apps  
      apps = r packages
      install
        library(devtools)
        install_github(c("stocks", "markdownapp", "nabel"), username="opencpu")
      web pages
        /inst/www/
      to use an app
        opt1
          library(opencpu)
          opencpu$browse("/library/stocks/www")
          opencpu$browse("/library/nabel/www")
        opt2
          https://cloud.opencpu.org/ocpu/library/stocks/www
          https://cloud.opencpu.org/ocpu/library/markdownapp/www
    CORS
      opt1: include web pages in R package
      opt2: call opencpu as web services
        ocpu.seturl()
          specify external opencpu server
        must: all R functions are in a single R package
    stateless functions
      $("#mydiv").rplot( fun[, args][, callback])
        r
          smoothplot = function(arg1, arg2) {
            ..
            plot(..)
          }
        js
          $("#plotdiv").rplot("smoothplot", {arg1: val1, ..})
      ocpu.rpc(fun[, args][, complete])
        js
          var mydata = [1,2,3]
          var req = ocpu.rpc("sd", {x: mydata})
    calls and sessions
      state in opencpu
        session id is return to client
      call an r function
        ocpu.call vs. ocpu.rpc
          stateful equivalent of ocpu.rpc
          difference: callback function
          rpc: callback argument is json object
          call: callback argument is a session object
        ex
          var req = ocpu.call("rnorm", {n:100}, function(session) {
            $("#key").text(session.getKey())
            session.getObject(function(data) {
              console.log( data.slice(0,3) )
            })
          })
      argument types
        4 types
          js value: converted to R via json
          session: represents R value
          file
          code snippet
      session object
        ref: https://www.opencpu.org/jslib.html
        methods
          getKey():string
          getLoc():string
          getFileURL(path):string
            path: path wrt working directory
          getObject(name[, data][, success]):jqXHR
          getConsole([success]):jqXHR
            ===
            getStdout([success]):jqXHR
          getFile(path[, success]):string
            path: path wrt working directory
        ex
          // http://jsfiddle.net/opencpu/tmqab/
          var req = ocpu.call("rnorm", {n: 100}, function(session){
            $("#key").text(session.getKey());
            // Session ID: x09ebbe143d 
            $("#location").text(session.getLoc());
            // http://public.opencpu.org/ocpu/tmp/x09ebbe143d/
            session.getConsole(function(outtxt){
              $("#output").text(outtxt);
            });
            // > rnorm(n = 100L, mean = 0.214583808813093)
            // [1]  1.675699563  2.696433712 -0.371826912  1.139382581 -0.043985045
            //retrieve the returned object async
            session.getObject(function(data){
              alert("Array of length " + data.length + ".\nFirst few values:" + data.slice(0,3));
              // 100
              // 1.67 2.69 -0.37
            });
          })
  OpenCPU Server Manual
    https://opencpu.github.io/server-manual/opencpu-server.pdf
    1. What is OpenCPU
      http api'leriyle R fonksiyonlarını kullanmaya izin verir
        ex
          curl http://localhost/ocpu/library/stats/R/rnorm/json --data n=3
            [
              3.05644,
              0.38511,
              1.11983
            ]
      1.3 OpenCPU Apps
        app = R package
          + some web pages
        web pages inside /inst/www/
      1.4 OpenCPU single-user server
        bu geliştirme amaçlı
        running
          install.packages("opencpu")
          library(opencpu)
          ocpu_start_server()
        app yüklemek için:
          ocpu_start_app("rwebapps/markdownapp")
            githubdan çekip yükler
          http://localhost:5656/ocpu/apps/rwebapps/markdownapp
        cloud server: normal sunucu, bu sadece lokalde çalışmak için
    2. Installing OpenCPU cloud server
    3. Managing the OpenCPU cloud server
      3.2. Installing R packages 
        should be installed in global library
          opt1
            wget https://cran.r-project.org/src/contrib/Rcpp_0.12.12.tar.gz
            sudo R CMD INSTALL Rcpp_0.12.12.tar.gz --library=/usr/local/lib/R/site-library
          opt2
            install.packages("Rcpp")
        after restarting they will be available through API
          sudo apachectl restart
          http://localhost:8004/ocpu/library/lubridate
          it restarts automatically after library installation
    4. Testing the OpenCPU API
      4.3 Calling a function
        general
          1. Perform a POST 
            curl http://your.server.com/ocpu/library/stats/R/rnorm -d "n=10&mean=100"
          2. OpenCPU returns locations of the output data
            /ocpu/tmp/x032a8fee/R/.val
            /ocpu/tmp/x032a8fee/stdout
          3. Perform a GET request
            http://your.server.com/ocpu/tmp/x032a8fee/R/.val
        exception: return json directly
          add /json to POST request
            http://../R/norm/json
        input arguments as json
          curl http://your.server.com/ocpu/library/stats/R/rnorm \
          -H "Content-Type: application/json" -d '{"n":10, "mean": 10, "sd":10}'
mongolite
  ref
    https://jeroen.github.io/mongolite
  library(mongolite)
  ref
    study_dentas_mongodb.R
  !!!tırnak meselesi
    json ifadelerinin dış tırnağı ', iç tırnakları " olmalı, yoksa kabul etmiyor
  connection to a database and collection
    con <- mongo("master_rates", url = "mongodb://myUserAdmin:12345@localhost:27017/demo")
    con$count()
  import / export
    import json
      con <- mongo("temp_rates", url = "mongodb://myUserAdmin:12345@localhost:27017/demo")
      con$import(file("/Users/mertnuhoglu/Dropbox/mynotes/prj/itr/pitr/dentas/mongo_export03_newline_delimited/master_rates.json"))
    import df
      con$insert(df)
  query
    all documents
      mrt = con$find()
    by date
      library(GetoptLong)
      datemillis <- as.integer(as.POSIXct("2015-01-01")) * 1000
      data <- data_collection$find(qq('{"createdAt":{"$gt": { "$date" : { "$numberLong" : "@{datemillis}" } } } }'))
      ===
      query2 = '{"validFromD" : { "$gte" : { "$date" : { "$numberLong" : "1488315600000" } }} }'
  update
    # single document
      con$update('{"depotName": "CORLU"}', '{"$set":{"ratePerExtraDrop": "51"}}')
    # multiple documents
      con$update('{"depotName": "CORLU"}', '{"$set":{"ratePerExtraDrop": "51"}}', multiple = T)
  aggregate
    ex
      stats <- flt$aggregate(
        '[{"$group":{"_id":"$carrier", "count": {"$sum":1}, "average":{"$avg":"$distance"}}}]',
        options = '{"allowDiskUse":true}'
      )
roxygen2  l
  running
    roxygen2::roxygenise()
    devtools::document()
    ^+D
  ex
    #' Add together two numbers
    #'
    #' @param x A number
    #' @param y A number
    #' @return The sum of \code{x} and \code{y}
    #' @examples
    #' add(1, 1)
    #' add(10, 1)
    add <- function(x, y) {
      x + y
    }
    generated: man/add.Rd
    accessed: ?add help("add") example("add")
unclassified
  execute a script from another directory / getting path of an executing script
    http://stackoverflow.com/posts/1815743/
    https://stackoverflow.com/posts/3473388/
    #!/usr/bin/env Rscript
    initial.options <- commandArgs(trailingOnly = FALSE)
    file.arg.name <- "--file="
    script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
    script.dirname <- dirname(script.name)
    source(file.path(script.dirname, "source_scripts.R"), chdir = T)
  suppress disable library sourcing loading messages
    suppressMessages(library(x))
packages packagesr
  devtools
    devtools::create("mypackage")
      create a new package
    devtools::load_all()
      reload your code
      #+L
    devtools::document() generates: 
      man/add.Rd
      #+B   build and reload
    devtools::install()
  github project
    # create R package in RStudio
    git init
    git add .
    git commit -m "First commit"
    git remote add origin https://github.com/mertnuhoglu/study_r_package01.git
    git remote -v
    git push -u origin master
    devtools::install_github("mertnuhoglu/study_r_package01")
  install path
    library: the directory where packages are installed
    get list of all libraries:
      .libPaths()
    by default R installs packages into the first directory in .libPaths()
    overriding
      R CMD INSTALL --library=/path/to/Rlibs
      -l --library
      env variable: R_LIBS R_LIBS_USER
      .libPaths( "/Users/tex/lib/R" ) 
        before installing or put into .Rprofile
  installed packages
    sessionInfo()
    installed.packages()
  file structure
    DESCRIPTION
    R/
    man/*.Rd: automatically generated by roxygen
    NAMESPACE: automatically generated
    data/
  Creating a package
    rstudio
      file > new project > new directory > r package
    cli
      devtools::create("path/to/package/pkgname")
  package files
    pkgname.Rproj
    .Rbuildignore
      ^.*\.Rproj$
    bundle: .tar.gz file
  library: install path
    a directory containing installed packages
    paths of libraries:
      .libPaths()
  Dependencies
    ex
      Imports: 
        dplyr,
  book: R Packages - Hadley Wickham
    http://r-pkgs.had.co.nz/intro.html
    content
      r code
        r/ directory
        where all R code lives
      package metadata
        DESCRIPTION
      documentation
        how to use functions
        roxygen2 to document functions
      vignettes
        big picture documentation
        uses Rmarkdown and knitr
      tests
        testthat
      namespace
        NAMESPACE
      external data
        data/ directory
      compiled code
        src/ directory
        compiled c code
      other components
        demo/
        exec/
        po/
        tools/
      other
        git
        automated checking
          R CMD check
        release
    getting started
      install.packages(c("devtools", "roxygen2", "testthat", "knitr"))
    Package structure
      naming your package 
      Creating a package
        rstudio
          file > new project > new directory > r package
        cli
          devtools::create("path/to/package/pkgname")
      RStudio Projects
        pkgname.Rproj
          text file like:
            Version: 1.0
            RestoreWorkspace: No
            SaveWorkspace: No
      What is a package
        Source packages
          directory with R/ DESCRIPTION etc.
        Bundled package
          .tar.gz file
          .Rbuildignore
            ^.*\.Rproj$
          Binary packages
          Installed packages
            decompressed into a package library
            installation done with: R CMD INSTALL
          In memory packages
            library(x)
      What is a library
        a directory containing installed packages
        note:
          library = directory containing packages
        paths of libraries:
          .libPaths()
          # osx:
            # [1] "/Users/mertnuhoglu/Library/R/3.3/library"                       "/Library/Frameworks/R.framework/Versions/3.3/Resources/library"
          # opencpu:
            # [1] "/usr/local/lib/R/site-library" "/usr/lib/R/site-library"
            # [3] "/usr/lib/R/library"
        lapply(.libPaths(), dir)
        When you use library(pkg) or require(pkg) to load a package, R looks through each path in .libPaths() to see if a directory called pkg exists.
    R Code
      reload your code
        devtools::load_all()
        #+L
      organizing functions
        extremes: 
          all functions in one file
          one file per function
        file extension: .R
        no subdirectories: use prefix
          abc-*.R
        split file when you can't remember where some function resides
        jump to definition: 
          F2
          ^. Code > Go to file/function
        navigate back: #F9
      Code style
        automatic formatting (lint): formatR
          install.packages("formatR")
          formatR::tidy_dir("R")
        opt lint: lintr
          install.packages("lintr")
          lintr::lint_package()
        Object names
        Spacing
          spaces around all infix ops =+-<- etc
            except : :: :::
          space before left parantheses, except function call
            if (debug)
            plot(x, y)
        Curly braces
          new line after {
          } in own line
        Line length
        Indentation
          2 spaces per tab
          second line where definition starts
            long_function_name <- function(a = "a long argument", 
                                           b = "another argument",
                                           c = "another long argument") {
        Assignment
          use <- not =
        Commenting guidelines
          # space
          # Part 1 ---------
      Top level code
        don't execute code at top level
        only define functions or objects
        ex
          foo package contains
            library(ggplot2)
            show_mtcars <- function() {
              qplot(mpg, wt, data = mtcars)
            }
          If someone tries to use it:
            library(foo)
            show_mtcars()
          this won't work because library(ggplot2) is executed when package is built, not when it's loaded
        The R Landscape
          never change global R landscape
            don't use library()
              these modify search path
              package requirements should be installed and sourced separately
            don't use source()
              it modifies current environment
              rely on devtools::load_all()
                it sources all files in R/
        When you do need side effects
          two specifal functions: .onLoad() .onAttach()
            called when packages is loaded and attached
            use .onLoad() in general
          common uses of .onLoad()
            display an informative message when package loads
            set custom options
              .onLoad <- function(libname, pkgname) {
                op <- options()
                op.devtools <- list(
                  devtools.path = "~/R-dev",
                  devtools.install.args = "",
            connect java, c etc
              ex
                rJava::.jpackage()
            register vignette engines
          .onLoad() saved in: zzz.R
          .onUnload()
        S4 classes
      CRAN notes
    Package metadata
      DESCRIPTION file
      devtools::create("mypackage")
      ex
        Package: mypackage
        Title: What The Package Does (one line, title case required)
      Dependencies
        packages needed
        ex
          Imports: 
            dplyr,
            ggvis
          Suggests:
            rlist
        package::function()
          explicitly refer to external functions
        Suggests: optional
          check if the package is available
            if (!requireNamespace("pkg" ...)
      Versioning
        Imports:
          ggvis (>= 0.2)
      Other dependencies
        Depends:
          deprecated
        LinkingTo
          for C code
        Enhances
          enhanced by your package
          don't use
      Title and description
        Title: one line description
        Description
        Authors@R
          r code
          Authors@R: person("Hadley", "Wickham", email = "hadley@rstudio.com",
            role = c("aut", "cre"))
      License
        MIT
          license must always be distributed with the code
        GPL-2 GPL-3
          copy left
          whole bundle should be GLP compatible
        CC0
          you give all your rights
      Version
        major.minor.patch
          1.2.3
        major.minor.patch.indevelopment
          0.0.0.9000
          first version
      Other components
    Object Documentation
      intro
        accessed by ? or help()
        like a dictionary
          you know the word
          learn what that word means
        vignette
          what the right word is
        man/ directory
          .Rd files
          loosely based on latex
          roxygen2 generates them
      documentation workflow
        steps
          1. add roxygen comments to .R files
          2. run devtools::document()
          3. preview doc with ?
          4. rinse and repeat
        roxygen comments
          #' Add together two numbers
          #'
          #' @param x A number
          #' @return The sum of \code{x} and 
          #' @examples
          #' add(1, 1)
          add <- function(x, y) { x + y }
        devtools::document() generates: 
          man/add.Rd
        ?add, help("add", example("add")
          generates HTML
      Alternative documentation workflow
        way to show links between pages
          1. roxygen comments
          2. "Build & Reload" or #+B
          3. ?
          4. rinse and repeat
      Roxygen comment
        lines preceding called: block
        tags: @tagName details
        @@: literal @
        introduction: first sentence (must)
          one line
        second paragraph: description (must)
        third and then: details
        \code{} \link{}
          formatting commands
          lines less than 80 chars
            #+/: reflow comment
        @section
          break details into chunks
          #' @section Warning:
        @seealso
          point useful resourcs
            \url{http://www.r-project.org}
            \code{\link{functionname}}
            \code{\link[packagename]{functionname}}
          @family
            family of related functions
        @aliases alias1 alias2
          ?alias1
        @keywords keyword1 keyword2
          not useful
      Documenting functions
        @param name Description.
        @param x,y Numeric vectors.
        @examples
          run automatically as part of R CMD check
          code that includes error:
            \dontrun{}
        @return description
      Documenting packages
        help for package 
          access
            package?foo
          @docType package
          put into: <package-name>.R
      Documenting classes
        S3
        S4
        RC
      Special characters
        @: @@
        %: \%
        \: \\
      Do repeat yourself
        DRY is not valid here
        Inheriting parameters from other functions
          @inheritParams foo
        Documenting multiple functions
      Text formatting
        Character
          \emph{italics}
          \strong{bold}
          \code{..}
          \preformatted{}
        Links
          \code{\link{function}}
          \link[=dest]{name}
          \url{..}
          \email{..}
        Lists
          \enumerate{
            \item ..
          }
          \itemize{ ..}
        Mathematics
          \eqn{a + b}
        Tables
          \tabular{}
    Vignettes
      intro
        browseVignettes()
          see all installed vignettes
        browseVignettes("packagename")
        consists of
          source file
          HTML/PDF
          R code
        vignette(x)
          read a specific one
        edit(vignette(x))
          see its code
      Vignette workflow
        creating
          devtools::use_vignette("my-vignette")
          1. creates vignettes/
          2. Adds dependencies to DESCRIPTION
          3. Drafts a vignette
            vignettes/my-vignette.Rmd
        workflow
          1. modify file
          2. #+K "Knit"
      Metadata
        first few lines
          ---
          title: "Vignette Title"
          author: "Vignette Author"
          vignette: >
            %\VignetteIndexEntry{Vignette Title}
        written in yaml
          >: literal text not yaml
      Markdown
        code
          ```r
          ```
      Knitr
        ex
          ```r
          # Add two numbers together
          add <- function(a, b) a + b
          add(10, 20)
          ```
          This generates the following Markdown:
          ```r
          # Add two numbers together
          add <- function(a, b) a + b
          add(10, 20)
            ## [1] 30
          ```
        tables
          ```{r, results = "asis"}
          pander::pandoc.table(iris[1:3, 1:4])
      Development cycle
        run chunk
          #!C
        run entire document
          #+K
        build all vignettes
          devtools::build_vignettes()
        create package with vignettes included
          devtools::build()
      Advice
        If you’re thinking without writing, you only think you’re thinking. — Leslie Lamport
        Style, J. Williams
      Organization
      CRAN notes
    Testing
      intro
        workflow upto now
          1. write function
          2. load it #+L or devtools::load_all()
          3. experiment in console
      Test workflow
        setup
          devtools::use_testthat()
          this will
            1. create tests/testthat/
            2. Adds testthat to DESCRIPTION
            3. create tests/testthat.R
        workflow 
          1. modify code
          2. test package #+T or devtools::test()
        testing output
          Expectation: ....
          Variance: ...123.45.
          "." a passed test
          number: failed test
        Test structure
          names start with "test"
          inside: tests/testtthat/
          ex
            context("file name")
            test_that("str_length is number of characters", {
              expect_equal(str_length("a"), 1)
              expect_equal(str_length("ab"), 2)
              expect_equal(str_length("abc"), 3)
            })
        Writing tests
        Skipping a test
          skip("api not available")
    Namespace
      Motivation
        :: operator
        two ways to make packages self-contained:
          imports
          exports
      Search path
        search()
          global environment
          attached packages
      The NAMESPACE
        roxygen2 can generate NAMESPACE file
      Workflow
        1. add roxygen comments
        2. devtools::document() or #+D
        3. check NAMESPACE and run tests to check
      Exports
        to make a function usable outside of your package
        by default: devtools::create() exports everything
        #' @export
        foo <- function(..)
      Imports
        which external functions can be used without ::
        best to be explicit: pkg::fun()
    External data
    Automated checking
      intro
        R CMD check
      workflow
        run devtools::check() or #+E 
          wraps R CMD check
          runs devtools::document()
          bundles package
      Checks
  article: R package primer - kbroman.org
    http://kbroman.org/pkg_primer/
    Building and installing
      opt1: console
        go to parent directory
        R CMD build mypackage
          * building ‘package01_0.0.0.9000.tar.gz’
        R CMD INSTALL mypackage.tar.gz
          R CMD INSTALL package01_0.0.0.9000.tar.gz
      opt2: devtools
        # go to package directory
        library(devtools)
        build()
          builds tar.gz
        install()
  article: Writing an R package from scratch - Hilary Parker
    https://hilaryparker.com/2014/04/29/writing-an-r-package-from-scratch/
    0. load libraries
      install.packages("devtools")
      library("devtools")
      devtools::install_github("klutometis/roxygen")
      library(roxygen2)
    1. create package directory
      setwd("parent_directory")
      create("cats")
    2. add function files
    3. ad documentation in roxygen comments
    4. build documentation
      setwd("./cats")
      document()
    5. install
      setwd("..")
      install("cats")
    6. push to github
next
  read funs
  http://rmaps.github.io/
  http://gis.stackexchange.com/questions/3310/what-is-the-most-useful-spatial-r-trick
R Study Notes & Refcard

refcard with examples

Mert Nuhoglu