Create Schema Script
This page contains a helper script to create a kdb Insights schema from a kdb+ database.
Options
The following options are accepted:
-
-db <path>Path to database. Defaults to current directory. -
-out <path>Output path. If not specified, outputs tostdout, or current directory in the case of-fmt package. -
-fmt (yaml|json|package)Output format. Ifyaml, outputs in the documented assembly format. Ifjson, output is intended for use in the Code View feature of the Web Interface. Ifpackage, the output is split into one file for each table,<tablename>.yaml, in the format used in kdb+ Insights Enterprise packages, and the files are placed in the directory specified by-out.
Script
q
// @desc Base names for types used by kdb+ Insights assembly files
typeNames:(.Q.t except" ")!key each(.Q.t except" ")$\:()
// @desc Nested columns are simple plurals, except 'chars' which becomes 'string'
typeNames:@[;"C";:;`string]typeNames,{(upper key x)!`$(string value x),'"s"}typeNames
// @desc Names for attributes used by assembly files
attrNames:`g`u`p`s!`grouped`unique`parted`sorted
// @desc Wraps `meta`, stripping out virtual partition columns.
// @param tbl Table to be described
// @return Table of metadata same as from `meta`
realMeta:{[tbl]m:meta tbl;if[1b~.Q.qp tbl;m:delete from m where c=.Q.pf];m}
// @desc Given a table, produces a data structure representing the assembly configuration for its columns.
// @param tbl Table (as value, not name)
// @return List of dicts of parameters for each column
describeCols:{[tbl] {if[`~x`type;x[`anymap]:`true];if[`=x`attrDisk;x:`attrDisk _x];x}each `name`type`attrDisk xcol `c`t`a#update typeNames t,attrNames a from 0!realMeta tbl}
// @desc Gets name, storage type and value of each table loaded.
// @return Table of name/type/value of tables
getTbls:{`name`type`val!/:{(x;((1b;0b;0)!`partitioned`splayed`basic) .Q.qp v;v:get x)}each tables[]}
// @desc Combines output of `getTbls` and `describeCols` into an assembly configuration.
// @param tblInfo Table output from `getTbls`
// @return Dict of parameters for each table, including parameters for their columns
describeTbls:{[tblInfo] (tblInfo`name)!{if[`<>s:first x[`columns;;`name]where x[`columns;;`attrDisk]in`parted`sorted;x[`sortColsDisk]:enlist s];x} each`type`columns#update columns:describeCols each val from tblInfo}
// @desc Out-of-band token that will be serialized into an invalid form to indicate where users must edit
EDITME:`$"@EDITME@"
// @desc Takes the output of `describeTbls` and annotates it with necessary user edits.
// @param dsc Dict output from `describeTbls`
// @return Pair `(dsc;msg)` of `dsc` with 'edit me' tokens added, and `msg` advising the user on the necessary edits
annotate:{[dsc]
reminder:"\n";
$[`json~args`fmt;
[fn:"JSON";reminder:"# REMINDER: Remove these comments from the JSON file before using it, as JSON does not natively support comments.\n",reminder];
fn:"YAML"];
msg:"# NOTE: This script generates a preliminary ",fn," file for kdb+ insights schema.\n",
"#\n",
"# IMPORTANT: The generated file is NOT a valid kdb+ insights schema yet.\n",
"# Manual review and updates are required for the following:\n",
"# 1. Define the `prtnCol` for each partitioned table.\n",
"# 2. Add necessary attributes for `idb` and `rdb` configurations.\n",
"# 3. Review and update `sort columns` as needed, ensuring extra columns are added where appropriate.\n";
dsc:{if[`partitioned=x`type;x[`prtnCol]:EDITME];x} each dsc;
dsc:{if[99h<>type x;:x];$[`attrDisk in key x;x,`attrOrd`attrMem!(EDITME;EDITME);x]}@/:/:/:dsc;
dsc:{if[`sortColsDisk in key x;x[`sortColsDisk],:EDITME];x}@/:dsc;
if[@[{.Q.pf};`;`date]<>`date;msg,:"# 4. Convert database to be partitioned by date rather than ",(string .Q.pf),".\n"];
if[count bad:where not `attrDisk in/:raze@'key@''dsc[p:where `partitioned=dsc[;`type];`columns];
msg,:"# 5. Add attributes to tables: ",(", "sv string p bad),", or queries on them could be very slow.\n"];
msg,:"#\n",
"# Please address these points before using the schema.\n",
reminder;
(dsc;msg)
}
// @desc Indents every line in a string by two spaces.
// @param x Input string
// @return Input string indented by two spaces
indent:{"\n"sv" ",/:"\n"vs x}
// @desc Serializes an atomic value. We use `.j.j` as JSON and YAML have compatible syntaxes for this. 'Edit me' tokens
// are converted into the invalid unquoted string `@EDITME@` at this stage.
// @param x Atomic value
// @return String representation of `x`
atomToStr:{$[x~EDITME;string EDITME;.j.j x]}
// @desc Outputs JSON with block indentation (unlike .j.j) and with special treatment of 'edit me' tokens.
// @param val Value to be seralised
// @return Serialised possibly-invalid JSON document
mkJSON:{[val]
t:type val;$[
t<0;atomToStr val;
t within 1 19;"[",(", "sv .z.s each val),"]";
t in 0 98h;"[\n",(indent",\n"sv .z.s each val),"\n]";
t=99h;"{\n",(indent",\n"sv": "sv/:flip (atomToStr each key val;.z.s each value val)),"\n}"
]
}
// @desc Outputs YAML in the same format as used in assembly specifications, and with special treatment of 'edit me' tokens.
// @param val Value to be serialised
// @return Serialised possibly-invalid YAML document
mkYAML:{[val]
t:type val;$[
t<0;atomToStr val;
t within 1 19;"[",(", "sv .z.s each val),"]";
t in 0 98h;"\n"sv{@[;0;:;"-"] indent x} each .z.s each val;
t=99h;"\n"sv": "sv/:flip (string key val;{$["\n" in x;"\n",indent x;x]}each .z.s each value val)
]
}
// @desc Converts a dict of dicts into a list by turning each key into a `name` sub-key. YAML assembly files are structured as a dict with
// table names as keys; JSON schemas in the UI and per-table YAML files in packages use a 'name' key for each table. This is used to
// convert from the former to the latter structure.
// @param dsc Dict of dicts output by `annotate`
// @return List of dicts
restructure:{[dsc]
((enlist`name)!/:enlist each key dsc),'value dsc
}
// @desc Writes a dict of dicts from `annotate` to disk by writing each key to disk in a separate file corresponding to the table name.
// @param dir Directory to write files into
// @param dsc Dict of dicts output by `annotate`
// @param msg String output by `annotate`
pkgOut:{[dir;dsc;msg] {.Q.dd[x;` sv(z`name),`yaml]0:enlist y,mkYAML z}[dir;msg]each restructure dsc}
// @desc Parses command-line arguments and calls functions to describe an output dictionary and output the description appropriately.
main:{
args::.Q.opt .z.x;
args::.Q.def[`db`out`fmt!(`.;`;`yaml)]args;
args[`out]:$[null args`out;`;(args`out)like "/*";hsym args`out;.Q.dd[hsym`$system"cd";args`out]];
@[system;"l ",string args`db;{[db;e]1"Cannot load database from ",db," ",e;exit 1}[string args`db]];
v:annotate describeTbls getTbls[];
dsc:v 0;
msg:v 1;
if[`package~args`fmt;pkgOut[args`out;dsc;msg];:()];
$[`~args`out;-1;'[0:[args`out];enlist]] msg,((`json`yaml!('[mkJSON;restructure];mkYAML))args`fmt) dsc
}
main[];exit 0