Example: datafiller SQL Data Generator

1417 words · 7 min read

Getting Started

cat data/library.sql
## CREATE TABLE Book(  --df: mult=3.0
##   bid SERIAL PRIMARY KEY,
##   title TEXT NOT NULL, 
##   isbn ISBN13 NOT NULL 
## );
## CREATE TABLE Reader( 
##   rid SERIAL PRIMARY KEY,
##   firstname TEXT NOT NULL, 
##   lastname TEXT NOT NULL, 
##   born DATE NOT NULL, 
##   gender BOOLEAN NOT NULL, 
##   phone TEXT 
## );
## CREATE TABLE Borrow( --df: mult=1.5
##   borrowed TIMESTAMP NOT NULL, 
##   rid INTEGER NOT NULL REFERENCES Reader,
##   bid INTEGER NOT NULL REFERENCES Book, 
##   PRIMARY KEY(bid) -- a book is borrowed once at a time!
## );

Note meta attributes:

CREATE TABLE Book(  --df: mult=2.0
...
datafiller --size=2 data/library.sql > data/library_test_data.sql
head data/library_test_data.sql
## 
## -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
## 
## -- fill table book (6)
## \echo # filling table book (6)
## COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
## 1    title_1_1_1_    9784413254182
## 2    title_4_4_4_4_4 9782571851779
## 3    title_3_3_3 9787601387877
## 4    title_2_2_2_2_2_    9788305707527

Meta Attributes

cat data/library02.sql
## CREATE TABLE Book(  --df: mult=2.0
##   bid SERIAL PRIMARY KEY,
##   title TEXT NOT NULL, 
##   -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
##   -- df: text=English length=4 lenvar=3
##   isbn ISBN13 NOT NULL 
## );
## CREATE TABLE Reader( 
##   rid SERIAL PRIMARY KEY,
##   firstname TEXT NOT NULL, 
##   -- df first: word=/Users/mertnuhoglu/projects/test_data/first_names.txt
##   -- df: text=first length=1 lenvar=0
##   lastname TEXT NOT NULL, 
##   -- df last: word=/Users/mertnuhoglu/projects/test_data/last_names.txt
##   -- df: text=last length=1 lenvar=0
##   born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
##   gender BOOLEAN NOT NULL, 
##   phone TEXT 
##   -- df: chars='0-9' length=10 lenvar=0
##   -- df: null=0.01 size=1000000
## );
## CREATE TABLE Borrow( --df: mult=1.5
##   borrowed TIMESTAMP NOT NULL, 
##   rid INTEGER NOT NULL REFERENCES Reader,
##   bid INTEGER NOT NULL REFERENCES Book, 
##   PRIMARY KEY(bid) -- a book is borrowed once at a time!
## );

Note the following attributes:

  title TEXT NOT NULL, 
  -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
  -- df: text=English length=4 lenvar=3
  ...
  born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
  ...
  phone TEXT 
  -- df: chars='0-9' length=10 lenvar=0
  -- df: null=0.01 size=1000000
  ...
datafiller --size=3 data/library02.sql > data/library_test_data02.sql
cat data/library_test_data02.sql
## 
## -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
## 
## -- fill table book (6)
## \echo # filling table book (6)
## COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
## 1    polished tu marble funded intro pennsylvania    9788014981218
## 2    bedford vg  9782723776547
## 3    trained commentary mozilla  9782723776547
## 4    gourmet mathematics 9782913571662
## 5    favors  9783956794780
## 6    knitting purpose creatures skins    9782723776547
## \.
## 
## -- fill table reader (3)
## \echo # filling table reader (3)
## COPY reader (rid,firstname,lastname,born,gender,phone) FROM STDIN (ENCODING 'utf-8');
## 1    Eldon   Feela   1970-01-29  FALSE   6001655890
## 2    Kerrie  Calk    1979-03-22  FALSE   6986568412
## 3    Fiona   Echavarria  1960-08-05  FALSE   3753286868
## \.
## 
## -- fill table borrow (4)
## \echo # filling table borrow (4)
## COPY borrow (borrowed,rid,bid) FROM STDIN (ENCODING 'utf-8');
## 2017-12-21 16:29:57  2   1
## 2017-12-21 16:30:57  3   2
## 2017-12-21 16:30:57  1   3
## 2017-12-21 16:28:57  2   4
## \.
## 
## -- restart sequences
## ALTER SEQUENCE book_bid_seq RESTART WITH 7;
## ALTER SEQUENCE reader_rid_seq RESTART WITH 4;
## 
## -- analyze modified tables
## ANALYZE book;
## ANALYZE reader;
## ANALYZE borrow;
cat data/library03.sql
## -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
## -- df first: word=/Users/mertnuhoglu/projects/test_data/first_names.txt
## -- df last: word=/Users/mertnuhoglu/projects/test_data/last_names.txt
## CREATE TABLE Book(  --df: mult=2.0
##   bid SERIAL PRIMARY KEY,
##   title TEXT NOT NULL, 
##   -- df: text=English length=4 lenvar=3
##   isbn INTEGER NOT NULL 
## );
## CREATE TABLE Reader( 
##   rid SERIAL PRIMARY KEY,
##   firstname TEXT NOT NULL, 
##   -- df: text=first length=1 lenvar=0
##   lastname TEXT NOT NULL, 
##   -- df: text=last length=1 lenvar=0
##   born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
##   gender BOOLEAN NOT NULL, 
##   -- df: rate=0.25
##   phone TEXT 
##   -- df: chars='0-9' length=10 lenvar=0
##   -- df: null=0.01 size=1000000
## );
## CREATE TABLE Borrow( --df: mult=1.5
##   borrowed TIMESTAMP NOT NULL, 
##   -- df: size=72000 prec=60
##   rid INTEGER NOT NULL REFERENCES Reader,
##   bid INTEGER NOT NULL REFERENCES Book, 
##   note TEXT,
##   -- df: sub=power prefix=note size=1000 rate=0.03
##   PRIMARY KEY(bid) -- a book is borrowed once at a time!
## );

Note the following attributes:

  gender BOOLEAN NOT NULL, 
  -- df: rate=0.25
  ...
  borrowed TIMESTAMP NOT NULL, 
  -- df: size=72000 prec=60
  ...
  note TEXT,
  -- df: sub=power prefix=note size=1000 rate=0.03
datafiller --size=3 data/library03.sql > data/library_test_data03.sql
cat data/library_test_data03.sql
## 
## -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
## 
## -- fill table book (6)
## \echo # filling table book (6)
## COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
## 1    palace funky    3
## 2    moves indeed download attendance towers answer britney  4
## 3    filled kent constant    6
## 4    engineer nearby me salmon shell bike brought    5
## 5    role    5
## 6    bumper charm gd 3
## \.
## 
## -- fill table reader (3)
## \echo # filling table reader (3)
## COPY reader (rid,firstname,lastname,born,gender,phone) FROM STDIN (ENCODING 'utf-8');
## 1    Todd    Sesso   1988-01-18  FALSE   1931243076
## 2    Dean    Ritmiller   1979-06-07  TRUE    1272396169
## 3    Berry   Salos   1968-04-26  TRUE    9848238480
## \.
## 
## -- fill table borrow (4)
## \echo # filling table borrow (4)
## COPY borrow (borrowed,rid,bid,note) FROM STDIN (ENCODING 'utf-8');
## 2017-12-13 07:26:12  1   1   note_493_493
## 2017-11-08 01:23:12  1   2   note_445_
## 2017-11-28 22:35:12  1   3   note_223_
## 2017-11-12 07:42:12  1   4   note_762_762
## \.
## 
## -- restart sequences
## ALTER SEQUENCE book_bid_seq RESTART WITH 7;
## ALTER SEQUENCE reader_rid_seq RESTART WITH 4;
## 
## -- analyze modified tables
## ANALYZE book;
## ANALYZE reader;
## ANALYZE borrow;
cat data/library04.sql
## -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
## -- df first: word=/Users/mertnuhoglu/projects/test_data/first_names.txt
## -- df last: word=/Users/mertnuhoglu/projects/test_data/last_names.txt
## CREATE TABLE Book(  --df: mult=2.0
##   bid SERIAL PRIMARY KEY,
##   title TEXT NOT NULL, 
##   -- df: text=English length=4 lenvar=3
##   isbn INTEGER NOT NULL 
## );
## CREATE TABLE Reader( 
##   rid SERIAL PRIMARY KEY,
##   firstname TEXT NOT NULL, 
##   -- df: text=first length=1 lenvar=0
##   lastname TEXT NOT NULL, 
##   -- df: text=last length=1 lenvar=0
##   born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
##   gender BOOLEAN NOT NULL, 
##   -- df: rate=0.25
##   phone TEXT,
##   -- df: chars='0-9' length=10 lenvar=0
##   -- df: null=0.01 size=1000000
##   email TEXT NOT NULL CHECK(email LIKE '%@%'),
##   -- df: pattern='[a-z]{3,8}\.[a-z]{3,8}@(gmail|yahoo)\.com'
##   -- define two macros
##   -- df librarian: inet='10.1.0.0/16'
##   -- df reader: inet='10.2.0.0/16'
##   ip TEXT NOT NULL
##   -- df: alt=reader:8,librarian:2
##   -- This would do as well: --df: alt=reader:4,librarian
## );
## CREATE TABLE Borrow( --df: mult=1.5
##   borrowed TIMESTAMP NOT NULL, 
##   -- df: size=72000 prec=60
##   rid INTEGER NOT NULL REFERENCES Reader,
##   bid INTEGER NOT NULL REFERENCES Book, 
##   note TEXT,
##   -- df: sub=power prefix=note size=1000 rate=0.03
##   PRIMARY KEY(bid) -- a book is borrowed once at a time!
## );

Note the following attributes:

  email TEXT NOT NULL CHECK(email LIKE '%@%'),
  -- df: pattern='[a-z]{3,8}\.[a-z]{3,8}@(gmail|yahoo)\.com'
  ...
datafiller --seed 1 --size=3 data/library04.sql > data/library_test_data04.sql
cat data/library_test_data04.sql
## 
## -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
## 
## -- fill table book (6)
## \echo # filling table book (6)
## COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
## 1    marvel meanwhile    1
## 2    pens stationery expansys hilton learning transition casting 5
## 3    issued ceiling ages olive lincoln moderate  5
## 4    societies construction markets unable portland  1
## 5    particles utah mask assault apparel submission panel    6
## 6    ohio    5
## \.
## 
## -- fill table reader (3)
## \echo # filling table reader (3)
## COPY reader (rid,firstname,lastname,born,gender,phone,email,ip) FROM STDIN (ENCODING 'utf-8');
## 1    Kandra  Lewandoski  1993-08-08  FALSE   4459330731  oui.yfi@yahoo.com   10.2.236.144
## 2    Marylee Verone  2000-09-24  TRUE    7610496804  qhx.jytx@yahoo.com  10.2.220.251
## 3    Raylene Latshaw 1993-06-01  TRUE    9780596236  wnoxxo.flhwmuzf@yahoo.com   10.2.86.119
## \.
## 
## -- fill table borrow (4)
## \echo # filling table borrow (4)
## COPY borrow (borrowed,rid,bid,note) FROM STDIN (ENCODING 'utf-8');
## 2017-11-09 06:52:08  3   1   note_21_
## 2017-11-10 22:16:08  1   2   note_543_543
## 2017-12-15 03:12:08  1   3   note_180_180_18
## 2017-11-07 13:46:08  1   4   note_110_
## \.
## 
## -- restart sequences
## ALTER SEQUENCE book_bid_seq RESTART WITH 7;
## ALTER SEQUENCE reader_rid_seq RESTART WITH 4;
## 
## -- analyze modified tables
## ANALYZE book;
## ANALYZE reader;
## ANALYZE borrow;

 Tech    15 Dec, 2017

Any work (images, writings, presentations, ideas or whatever) which I own is always provided under
Creative Commons License Creative Commons Attribution-Share Alike 3.0 License

Mert Nuhoglu is a Trabzon-born programmer and data scientist.

You may also like...