Create Schema Script

This page contains a helper script to create a kdb Insights schema from a kdb+ database.

Options

The following options are accepted:

  • -db <path> Path to database. Defaults to current directory.

  • -out <path> Output path. If not specified, outputs to stdout, or current directory in the case of -fmt package.

  • -fmt (yaml|json|package) Output format. If yaml, outputs in the documented assembly format. If json, output is intended for use in the Code View feature of the Web Interface. If package, the output is split into one file for each table, <tablename>.yaml, in the format used in kdb+ Insights Enterprise packages, and the files are placed in the directory specified by -out.

Script

q

Copy
// @desc    Base names for types used by kdb+ Insights assembly files
typeNames:(.Q.t except" ")!key each(.Q.t except" ")$\:()
// @desc    Nested columns are simple plurals, except 'chars' which becomes 'string'
typeNames:@[;"C";:;`string]typeNames,{(upper key x)!`$(string value x),'"s"}typeNames
// @desc    Names for attributes used by assembly files
attrNames:`g`u`p`s!`grouped`unique`parted`sorted

// @desc    Wraps `meta`, stripping out virtual partition columns.
// @param   tbl     Table to be described
// @return  Table of metadata same as from `meta`
realMeta:{[tbl]m:meta tbl;if[1b~.Q.qp tbl;m:delete from m where c=.Q.pf];m}

// @desc    Given a table, produces a data structure representing the assembly configuration for its columns.
// @param   tbl     Table (as value, not name)
// @return  List of dicts of parameters for each column
describeCols:{[tbl] {if[`~x`type;x[`anymap]:`true];if[`=x`attrDisk;x:`attrDisk _x];x}each `name`type`attrDisk xcol `c`t`a#update typeNames t,attrNames a from 0!realMeta tbl}

// @desc    Gets name, storage type and value of each table loaded.
// @return  Table of name/type/value of tables
getTbls:{`name`type`val!/:{(x;((1b;0b;0)!`partitioned`splayed`basic) .Q.qp v;v:get x)}each tables[]}

// @desc    Combines output of `getTbls` and `describeCols` into an assembly configuration.
// @param   tblInfo Table output from `getTbls`
// @return  Dict of parameters for each table, including parameters for their columns
describeTbls:{[tblInfo] (tblInfo`name)!{if[`<>s:first x[`columns;;`name]where x[`columns;;`attrDisk]in`parted`sorted;x[`sortColsDisk]:enlist s];x} each`type`columns#update columns:describeCols each val from tblInfo}

// @desc    Out-of-band token that will be serialized into an invalid form to indicate where users must edit
EDITME:`$"@EDITME@"

// @desc    Takes the output of `describeTbls` and annotates it with necessary user edits.
// @param   dsc     Dict output from `describeTbls`
// @return  Pair `(dsc;msg)` of `dsc` with 'edit me' tokens added, and `msg` advising the user on the necessary edits
annotate:{[dsc]
    reminder:"\n";
    $[`json~args`fmt;
        [fn:"JSON";reminder:"# REMINDER: Remove these comments from the JSON file before using it, as JSON does not natively support comments.\n",reminder];
        fn:"YAML"];

    msg:"# NOTE: This script generates a preliminary ",fn," file for kdb+ insights schema.\n",
    "#\n",
    "# IMPORTANT: The generated file is NOT a valid kdb+ insights schema yet.\n",
    "# Manual review and updates are required for the following:\n",
    "# 1. Define the `prtnCol` for each partitioned table.\n",
    "# 2. Add necessary attributes for `idb` and `rdb` configurations.\n",
    "# 3. Review and update `sort columns` as needed, ensuring extra columns are added where appropriate.\n";
    dsc:{if[`partitioned=x`type;x[`prtnCol]:EDITME];x} each dsc;
    dsc:{if[99h<>type x;:x];$[`attrDisk in key x;x,`attrOrd`attrMem!(EDITME;EDITME);x]}@/:/:/:dsc;
    dsc:{if[`sortColsDisk in key x;x[`sortColsDisk],:EDITME];x}@/:dsc;
    if[@[{.Q.pf};`;`date]<>`date;msg,:"# 4. Convert database to be partitioned by date rather than ",(string .Q.pf),".\n"];
    if[count bad:where not `attrDisk in/:raze@'key@''dsc[p:where `partitioned=dsc[;`type];`columns];
        msg,:"# 5. Add attributes to tables: ",(", "sv string p bad),", or queries on them could be very slow.\n"];
    msg,:"#\n",
    "# Please address these points before using the schema.\n",
    reminder;
    (dsc;msg)
 }

// @desc    Indents every line in a string by two spaces.
// @param   x       Input string
// @return  Input string indented by two spaces
indent:{"\n"sv"  ",/:"\n"vs x}

// @desc    Serializes an atomic value. We use `.j.j` as JSON and YAML have compatible syntaxes for this. 'Edit me' tokens
//          are converted into the invalid unquoted string `@EDITME@` at this stage.
// @param   x       Atomic value
// @return  String representation of `x`
atomToStr:{$[x~EDITME;string EDITME;.j.j x]}

// @desc    Outputs JSON with block indentation (unlike .j.j) and with special treatment of 'edit me' tokens.
// @param   val     Value to be seralised
// @return  Serialised possibly-invalid JSON document
mkJSON:{[val]
    t:type val;$[
    t<0;atomToStr val;
    t within 1 19;"[",(", "sv .z.s each val),"]";
    t in 0 98h;"[\n",(indent",\n"sv .z.s each val),"\n]";
    t=99h;"{\n",(indent",\n"sv": "sv/:flip (atomToStr each key val;.z.s each value val)),"\n}"
    ]
 }

// @desc    Outputs YAML in the same format as used in assembly specifications, and with special treatment of 'edit me' tokens.
// @param   val     Value to be serialised
// @return  Serialised possibly-invalid YAML document
mkYAML:{[val]
    t:type val;$[
    t<0;atomToStr val;
    t within 1 19;"[",(", "sv .z.s each val),"]";
    t in 0 98h;"\n"sv{@[;0;:;"-"] indent x} each .z.s each val;
    t=99h;"\n"sv": "sv/:flip (string key val;{$["\n" in x;"\n",indent x;x]}each .z.s each value val)
    ]
 }

// @desc    Converts a dict of dicts into a list by turning each key into a `name` sub-key. YAML assembly files are structured as a dict with
//          table names as keys; JSON schemas in the UI and per-table YAML files in packages use a 'name' key for each table. This is used to
//          convert from the former to the latter structure.
// @param   dsc     Dict of dicts output by `annotate`
// @return  List of dicts 
restructure:{[dsc]
  ((enlist`name)!/:enlist each key dsc),'value dsc
  }

// @desc    Writes a dict of dicts from `annotate` to disk by writing each key to disk in a separate file corresponding to the table name.
// @param   dir     Directory to write files into
// @param   dsc     Dict of dicts output by `annotate`
// @param   msg     String output by `annotate`
pkgOut:{[dir;dsc;msg] {.Q.dd[x;` sv(z`name),`yaml]0:enlist y,mkYAML z}[dir;msg]each restructure dsc}

// @desc    Parses command-line arguments and calls functions to describe an output dictionary and output the description appropriately.
main:{
    args::.Q.opt .z.x;
    args::.Q.def[`db`out`fmt!(`.;`;`yaml)]args;
    args[`out]:$[null args`out;`;(args`out)like "/*";hsym args`out;.Q.dd[hsym`$system"cd";args`out]];
    @[system;"l ",string args`db;{[db;e]1"Cannot load database from ",db," ",e;exit 1}[string args`db]];
    v:annotate describeTbls getTbls[];
    dsc:v 0;
    msg:v 1;
    if[`package~args`fmt;pkgOut[args`out;dsc;msg];:()];
    $[`~args`out;-1;'[0:[args`out];enlist]] msg,((`json`yaml!('[mkJSON;restructure];mkYAML))args`fmt) dsc
 }

main[];exit 0