Example: datafiller SQL Data Generator
datafiller is a tool to generate SQL from DDL files.
Table of Contents
Getting Started
cat data/library.sql
##> CREATE TABLE Book( --df: mult=3.0
##> bid SERIAL PRIMARY KEY,
##> title TEXT NOT NULL,
##> isbn ISBN13 NOT NULL
##> );
##> CREATE TABLE Reader(
##> rid SERIAL PRIMARY KEY,
##> firstname TEXT NOT NULL,
##> lastname TEXT NOT NULL,
##> born DATE NOT NULL,
##> gender BOOLEAN NOT NULL,
##> phone TEXT
##> );
##> CREATE TABLE Borrow( --df: mult=1.5
##> borrowed TIMESTAMP NOT NULL,
##> rid INTEGER NOT NULL REFERENCES Reader,
##> bid INTEGER NOT NULL REFERENCES Book,
##> PRIMARY KEY(bid) -- a book is borrowed once at a time!
##> );
Note meta attributes:
CREATE TABLE Book( --df: mult=2.0
...
datafiller --size=2 data/library.sql > data/library_test_data.sql
head data/library_test_data.sql
##>
##> -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
##>
##> -- fill table book (6)
##> \echo # filling table book (6)
##> COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
##> 1 title_1_1_1_ 9784413254182
##> 2 title_4_4_4_4_4 9782571851779
##> 3 title_3_3_3 9787601387877
##> 4 title_2_2_2_2_2_ 9788305707527
Meta Attributes
cat data/library02.sql
##> CREATE TABLE Book( --df: mult=2.0
##> bid SERIAL PRIMARY KEY,
##> title TEXT NOT NULL,
##> -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
##> -- df: text=English length=4 lenvar=3
##> isbn ISBN13 NOT NULL
##> );
##> CREATE TABLE Reader(
##> rid SERIAL PRIMARY KEY,
##> firstname TEXT NOT NULL,
##> -- df first: word=/Users/mertnuhoglu/projects/test_data/first_names.txt
##> -- df: text=first length=1 lenvar=0
##> lastname TEXT NOT NULL,
##> -- df last: word=/Users/mertnuhoglu/projects/test_data/last_names.txt
##> -- df: text=last length=1 lenvar=0
##> born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
##> gender BOOLEAN NOT NULL,
##> phone TEXT
##> -- df: chars='0-9' length=10 lenvar=0
##> -- df: null=0.01 size=1000000
##> );
##> CREATE TABLE Borrow( --df: mult=1.5
##> borrowed TIMESTAMP NOT NULL,
##> rid INTEGER NOT NULL REFERENCES Reader,
##> bid INTEGER NOT NULL REFERENCES Book,
##> PRIMARY KEY(bid) -- a book is borrowed once at a time!
##> );
##>
##>
Note the following attributes:
title TEXT NOT NULL,
-- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
-- df: text=English length=4 lenvar=3
...
born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
...
phone TEXT
-- df: chars='0-9' length=10 lenvar=0
-- df: null=0.01 size=1000000
...
datafiller --size=3 data/library02.sql > data/library_test_data02.sql
cat data/library_test_data02.sql
##>
##> -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
##>
##> -- fill table book (6)
##> \echo # filling table book (6)
##> COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
##> 1 polished tu marble funded intro pennsylvania 9788014981218
##> 2 bedford vg 9782723776547
##> 3 trained commentary mozilla 9782723776547
##> 4 gourmet mathematics 9782913571662
##> 5 favors 9783956794780
##> 6 knitting purpose creatures skins 9782723776547
##> \.
##>
##> -- fill table reader (3)
##> \echo # filling table reader (3)
##> COPY reader (rid,firstname,lastname,born,gender,phone) FROM STDIN (ENCODING 'utf-8');
##> 1 Eldon Feela 1970-01-29 FALSE 6001655890
##> 2 Kerrie Calk 1979-03-22 FALSE 6986568412
##> 3 Fiona Echavarria 1960-08-05 FALSE 3753286868
##> \.
##>
##> -- fill table borrow (4)
##> \echo # filling table borrow (4)
##> COPY borrow (borrowed,rid,bid) FROM STDIN (ENCODING 'utf-8');
##> 2017-12-21 16:29:57 2 1
##> 2017-12-21 16:30:57 3 2
##> 2017-12-21 16:30:57 1 3
##> 2017-12-21 16:28:57 2 4
##> \.
##>
##> -- restart sequences
##> ALTER SEQUENCE book_bid_seq RESTART WITH 7;
##> ALTER SEQUENCE reader_rid_seq RESTART WITH 4;
##>
##> -- analyze modified tables
##> ANALYZE book;
##> ANALYZE reader;
##> ANALYZE borrow;
cat data/library03.sql
##> -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
##> -- df first: word=/Users/mertnuhoglu/projects/test_data/first_names.txt
##> -- df last: word=/Users/mertnuhoglu/projects/test_data/last_names.txt
##> CREATE TABLE Book( --df: mult=2.0
##> bid SERIAL PRIMARY KEY,
##> title TEXT NOT NULL,
##> -- df: text=English length=4 lenvar=3
##> isbn INTEGER NOT NULL
##> );
##> CREATE TABLE Reader(
##> rid SERIAL PRIMARY KEY,
##> firstname TEXT NOT NULL,
##> -- df: text=first length=1 lenvar=0
##> lastname TEXT NOT NULL,
##> -- df: text=last length=1 lenvar=0
##> born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
##> gender BOOLEAN NOT NULL,
##> -- df: rate=0.25
##> phone TEXT
##> -- df: chars='0-9' length=10 lenvar=0
##> -- df: null=0.01 size=1000000
##> );
##> CREATE TABLE Borrow( --df: mult=1.5
##> borrowed TIMESTAMP NOT NULL,
##> -- df: size=72000 prec=60
##> rid INTEGER NOT NULL REFERENCES Reader,
##> bid INTEGER NOT NULL REFERENCES Book,
##> note TEXT,
##> -- df: sub=power prefix=note size=1000 rate=0.03
##> PRIMARY KEY(bid) -- a book is borrowed once at a time!
##> );
##>
##>
##>
Note the following attributes:
gender BOOLEAN NOT NULL,
-- df: rate=0.25
...
borrowed TIMESTAMP NOT NULL,
-- df: size=72000 prec=60
...
note TEXT,
-- df: sub=power prefix=note size=1000 rate=0.03
datafiller --size=3 data/library03.sql > data/library_test_data03.sql
cat data/library_test_data03.sql
##>
##> -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
##>
##> -- fill table book (6)
##> \echo # filling table book (6)
##> COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
##> 1 palace funky 3
##> 2 moves indeed download attendance towers answer britney 4
##> 3 filled kent constant 6
##> 4 engineer nearby me salmon shell bike brought 5
##> 5 role 5
##> 6 bumper charm gd 3
##> \.
##>
##> -- fill table reader (3)
##> \echo # filling table reader (3)
##> COPY reader (rid,firstname,lastname,born,gender,phone) FROM STDIN (ENCODING 'utf-8');
##> 1 Todd Sesso 1988-01-18 FALSE 1931243076
##> 2 Dean Ritmiller 1979-06-07 TRUE 1272396169
##> 3 Berry Salos 1968-04-26 TRUE 9848238480
##> \.
##>
##> -- fill table borrow (4)
##> \echo # filling table borrow (4)
##> COPY borrow (borrowed,rid,bid,note) FROM STDIN (ENCODING 'utf-8');
##> 2017-12-13 07:26:12 1 1 note_493_493
##> 2017-11-08 01:23:12 1 2 note_445_
##> 2017-11-28 22:35:12 1 3 note_223_
##> 2017-11-12 07:42:12 1 4 note_762_762
##> \.
##>
##> -- restart sequences
##> ALTER SEQUENCE book_bid_seq RESTART WITH 7;
##> ALTER SEQUENCE reader_rid_seq RESTART WITH 4;
##>
##> -- analyze modified tables
##> ANALYZE book;
##> ANALYZE reader;
##> ANALYZE borrow;
cat data/library04.sql
##> -- df English: word=/Users/mertnuhoglu/projects/test_data/google-10000-english-no-swears.txt
##> -- df first: word=/Users/mertnuhoglu/projects/test_data/first_names.txt
##> -- df last: word=/Users/mertnuhoglu/projects/test_data/last_names.txt
##> CREATE TABLE Book( --df: mult=2.0
##> bid SERIAL PRIMARY KEY,
##> title TEXT NOT NULL,
##> -- df: text=English length=4 lenvar=3
##> isbn INTEGER NOT NULL
##> );
##> CREATE TABLE Reader(
##> rid SERIAL PRIMARY KEY,
##> firstname TEXT NOT NULL,
##> -- df: text=first length=1 lenvar=0
##> lastname TEXT NOT NULL,
##> -- df: text=last length=1 lenvar=0
##> born DATE NOT NULL, -- df: start=1950-01-01 end=2005-01-01
##> gender BOOLEAN NOT NULL,
##> -- df: rate=0.25
##> phone TEXT,
##> -- df: chars='0-9' length=10 lenvar=0
##> -- df: null=0.01 size=1000000
##> email TEXT NOT NULL CHECK(email LIKE '%@%'),
##> -- df: pattern='[a-z]{3,8}\.[a-z]{3,8}@(gmail|yahoo)\.com'
##> -- define two macros
##> -- df librarian: inet='10.1.0.0/16'
##> -- df reader: inet='10.2.0.0/16'
##> ip TEXT NOT NULL
##> -- df: alt=reader:8,librarian:2
##> -- This would do as well: --df: alt=reader:4,librarian
##> );
##> CREATE TABLE Borrow( --df: mult=1.5
##> borrowed TIMESTAMP NOT NULL,
##> -- df: size=72000 prec=60
##> rid INTEGER NOT NULL REFERENCES Reader,
##> bid INTEGER NOT NULL REFERENCES Book,
##> note TEXT,
##> -- df: sub=power prefix=note size=1000 rate=0.03
##> PRIMARY KEY(bid) -- a book is borrowed once at a time!
##> );
##>
Note the following attributes:
email TEXT NOT NULL CHECK(email LIKE '%@%'),
-- df: pattern='[a-z]{3,8}\.[a-z]{3,8}@(gmail|yahoo)\.com'
...
datafiller --seed 1 --size=3 data/library04.sql > data/library_test_data04.sql
cat data/library_test_data04.sql
##>
##> -- data generated by /usr/local/bin/datafiller version 2.0.0 (r792 on 2014-03-23) for postgresql
##>
##> -- fill table book (6)
##> \echo # filling table book (6)
##> COPY book (bid,title,isbn) FROM STDIN (ENCODING 'utf-8');
##> 1 marvel meanwhile 1
##> 2 pens stationery expansys hilton learning transition casting 5
##> 3 issued ceiling ages olive lincoln moderate 5
##> 4 societies construction markets unable portland 1
##> 5 particles utah mask assault apparel submission panel 6
##> 6 ohio 5
##> \.
##>
##> -- fill table reader (3)
##> \echo # filling table reader (3)
##> COPY reader (rid,firstname,lastname,born,gender,phone,email,ip) FROM STDIN (ENCODING 'utf-8');
##> 1 Kandra Lewandoski 1993-08-08 FALSE 4459330731 oui.yfi@yahoo.com 10.2.236.144
##> 2 Marylee Verone 2000-09-24 TRUE 7610496804 qhx.jytx@yahoo.com 10.2.220.251
##> 3 Raylene Latshaw 1993-06-01 TRUE 9780596236 wnoxxo.flhwmuzf@yahoo.com 10.2.86.119
##> \.
##>
##> -- fill table borrow (4)
##> \echo # filling table borrow (4)
##> COPY borrow (borrowed,rid,bid,note) FROM STDIN (ENCODING 'utf-8');
##> 2017-11-09 06:52:08 3 1 note_21_
##> 2017-11-10 22:16:08 1 2 note_543_543
##> 2017-12-15 03:12:08 1 3 note_180_180_18
##> 2017-11-07 13:46:08 1 4 note_110_
##> \.
##>
##> -- restart sequences
##> ALTER SEQUENCE book_bid_seq RESTART WITH 7;
##> ALTER SEQUENCE reader_rid_seq RESTART WITH 4;
##>
##> -- analyze modified tables
##> ANALYZE book;
##> ANALYZE reader;
##> ANALYZE borrow;