Create Schema Script

This page contains a helper script to create a kdb Insights schema from a kdb+ database.

Options

The following options are accepted:

  • -db <path> Path to database. Defaults to current directory.

  • -out <path> Output path. If not specified, outputs to stdout, or current directory in the case of -fmt package.

  • -fmt (yaml|json|package) Output format. If yaml, outputs in the documented assembly format. If json, output is intended for use in the 'view code' feature of the kdb+ Insights UI assembly editor. If package, the output is split into one file for each table, <tablename>.yaml, in the format used in kdb+ Insights Enterprise packages, and the files are placed in the directory specified by -out.

Script

// @desc    Base names for types used by kdb+ Insights assembly files
typeNames:(.Q.t except" ")!key each(.Q.t except" ")$\:()
// @desc    Nested columns are simple plurals, except 'chars' which becomes 'string'
typeNames:@[;"C";:;`string]typeNames,{(upper key x)!`$(string value x),'"s"}typeNames
// @desc    Names for attributes used by assembly files
attrNames:`g`u`p`s!`grouped`unique`parted`sorted
// @desc    Wraps `meta`, stripping out virtual partition columns.
// @param   tbl     Table to be described
// @return  Table of metadata same as from `meta`
realMeta:{[tbl]m:meta tbl;if[1b~.Q.qp tbl;m:delete from m where c=.Q.pf];m}
// @desc    Given a table, produces a data structure representing the assembly configuration for its columns.
// @param   tbl     Table (as value, not name)
// @return  List of dicts of parameters for each column
describeCols:{[tbl] {if[`~x`type;x[`anymap]:`true];if[`=x`attrDisk;x:`attrDisk _x];x}each `name`type`attrDisk xcol `c`t`a#update typeNames t,attrNames a from 0!realMeta tbl}
// @desc    Gets name, storage type and value of each table loaded.
// @return  Table of name/type/value of tables
getTbls:{`name`type`val!/:{(x;((1b;0b;0)!`partitioned`splayed`basic) .Q.qp v;v:get x)}each tables[]}
// @desc    Combines output of `getTbls` and `describeCols` into an assembly configuration.
// @param   tblInfo Table output from `getTbls`
// @return  Dict of parameters for each table, including parameters for their columns
describeTbls:{[tblInfo] (tblInfo`name)!{if[`<>s:first x[`columns;;`name]where x[`columns;;`attrDisk]in`parted`sorted;x[`sortColsDisk]:enlist s];x} each`type`columns#update columns:describeCols each val from tblInfo}
// @desc    Out-of-band token that will be serialized into an invalid form to indicate where users must edit
EDITME:`$"@EDITME@"
// @desc    Takes the output of `describeTbls` and annotates it with necessary user edits.
// @param   dsc     Dict output from `describeTbls`
// @return  Pair `(dsc;msg)` of `dsc` with 'edit me' tokens added, and `msg` advising the user on the necessary edits
annotate:{[dsc]
 reminder:"\n";
 $[`json~args`fmt;
 [fn:"JSON";reminder:"# REMINDER: Remove these comments from the JSON file before using it, as JSON does not natively support comments.\n",reminder];
 fn:"YAML"];
 msg:"# NOTE: This script generates a preliminary ",fn," file for kdb+ insights schema.\n",
 "#\n",
 "# IMPORTANT: The generated file is NOT a valid kdb+ insights schema yet.\n",
 "# Manual review and updates are required for the following:\n",
 "# 1. Define the `prtnCol` for each partitioned table.\n",
 "# 2. Add necessary attributes for `idb` and `rdb` configurations.\n",
 "# 3. Review and update `sort columns` as needed, ensuring extra columns are added where appropriate.\n";
 dsc:{if[`partitioned=x`type;x[`prtnCol]:EDITME];x} each dsc;
 dsc:{if[99h<>type x;:x];$[`attrDisk in key x;x,`attrOrd`attrMem!(EDITME;EDITME);x]}@/:/:/:dsc;
 dsc:{if[`sortColsDisk in key x;x[`sortColsDisk],:EDITME];x}@/:dsc;
 if[@[{.Q.pf};`;`date]<>`date;msg,:"# 4. Convert database to be partitioned by date rather than ",(string .Q.pf),".\n"];
 if[count bad:where not `attrDisk in/:raze@'key@''dsc[p:where `partitioned=dsc[;`type];`columns];
 msg,:"# 5. Add attributes to tables: ",(", "sv string p bad),", or queries on them could be very slow.\n"];
 msg,:"#\n",
 "# Please address these points before using the schema.\n",
 reminder;
 (dsc;msg)
 }
// @desc    Indents every line in a string by two spaces.
// @param   x       Input string
// @return  Input string indented by two spaces
indent:{"\n"sv"  ",/:"\n"vs x}
// @desc    Serializes an atomic value. We use `.j.j` as JSON and YAML have compatible syntaxes for this. 'Edit me' tokens
//          are converted into the invalid unquoted string `@EDITME@` at this stage.
// @param   x       Atomic value
// @return  String representation of `x`
atomToStr:{$[x~EDITME;string EDITME;.j.j x]}
// @desc    Outputs JSON with block indentation (unlike .j.j) and with special treatment of 'edit me' tokens.
// @param   val     Value to be seralised
// @return  Serialised possibly-invalid JSON document
mkJSON:{[val]
 t:type val;$[
 t<0;atomToStr val;
 t within 1 19;"[",(", "sv .z.s each val),"]";
 t in 0 98h;"[\n",(indent",\n"sv .z.s each val),"\n]";
 t=99h;"{\n",(indent",\n"sv": "sv/:flip (atomToStr each key val;.z.s each value val)),"\n}"
 ]
 }
// @desc    Outputs YAML in the same format as used in assembly specifications, and with special treatment of 'edit me' tokens.
// @param   val     Value to be serialised
// @return  Serialised possibly-invalid YAML document
mkYAML:{[val]
 t:type val;$[
 t<0;atomToStr val;
 t within 1 19;"[",(", "sv .z.s each val),"]";
 t in 0 98h;"\n"sv{@[;0;:;"-"] indent x} each .z.s each val;
 t=99h;"\n"sv": "sv/:flip (string key val;{$["\n" in x;"\n",indent x;x]}each .z.s each value val)
 ]
 }
// @desc    Converts a dict of dicts into a list by turning each key into a `name` sub-key. YAML assembly files are structured as a dict with
//          table names as keys; JSON schemas in the UI and per-table YAML files in packages use a 'name' key for each table. This is used to
//          convert from the former to the latter structure.
// @param   dsc     Dict of dicts output by `annotate`
// @return  List of dicts 
restructure:{[dsc]
 ((enlist`name)!/:enlist each key dsc),'value dsc
 }
// @desc    Writes a dict of dicts from `annotate` to disk by writing each key to disk in a separate file corresponding to the table name.
// @param   dir     Directory to write files into
// @param   dsc     Dict of dicts output by `annotate`
// @param   msg     String output by `annotate`
pkgOut:{[dir;dsc;msg] {.Q.dd[x;` sv(z`name),`yaml]0:enlist y,mkYAML z}[dir;msg]each restructure dsc}
// @desc    Parses command-line arguments and calls functions to describe an output dictionary and output the description appropriately.
main:{
 args::.Q.opt .z.x;
 args::.Q.def[`db`out`fmt!(`.;`;`yaml)]args;
 args[`out]:$[null args`out;`;(args`out)like "/*";hsym args`out;.Q.dd[hsym`$system"cd";args`out]];
 @[system;"l ",string args`db;{[db;e]1"Cannot load database from ",db," ",e;exit 1}[string args`db]];
 v:annotate describeTbls getTbls[];
 dsc:v 0;
 msg:v 1;
 if[`package~args`fmt;pkgOut[args`out;dsc;msg];:()];
 $[`~args`out;-1;'[0:[args`out];enlist]] msg,((`json`yaml!('[mkJSON;restructure];mkYAML))args`fmt) dsc
 }
main[];exit 0