From b048115698625f11d1870a3e4af1d663f43c4976 Mon Sep 17 00:00:00 2001
From: Firstyear <william@blackhats.net.au>
Date: Mon, 17 Feb 2020 08:09:33 +1030
Subject: [PATCH] 20200216 document internals (#187)

Add documentation of internal api
---
 README.md                 |   6 +-
 designs/entries.rst       |  38 ---------
 designs/filter.rst        | 163 --------------------------------------
 designs/schema.rst        | 117 ---------------------------
 kanidmd/src/lib/entry.rs  |  53 +++++++++++++
 kanidmd/src/lib/filter.rs |  33 +++++++-
 kanidmd/src/lib/schema.rs |  56 +++++++++++++
 kanidmd/src/lib/server.rs |  22 +++++
 8 files changed, 166 insertions(+), 322 deletions(-)
 delete mode 100644 designs/entries.rst
 delete mode 100644 designs/filter.rst
 delete mode 100644 designs/schema.rst

diff --git a/README.md b/README.md
index 188940021..c19cae384 100644
--- a/README.md
+++ b/README.md
@@ -68,7 +68,11 @@ For more see the [kanidm book]
 
 ### Designs
 
-See the [designs] folder
+See the [designs] folder, and compile the private documentation locally:
+
+```
+cargo doc --document-private-items --open --no-deps
+```
 
 [designs]: https://github.com/kanidm/kanidm/tree/master/designs
 
diff --git a/designs/entries.rst b/designs/entries.rst
deleted file mode 100644
index a5c865007..000000000
--- a/designs/entries.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-
-Entries
--------
-
-Entries are the base unit of data in this server. This is one of the three foundational concepts
-along with filters and schema that everything thing else builds upon.
-
-What is an Entry?
------------------
-
-An entry is a collection of attribute-values. These are sometimes called attribute-value-assertions,
-attr-value sets. The attribute is a "key", and it holds 1 to infinite values associated. An entry
-can have many avas associated, which creates the entry as a whole. An example entry (minus schema):
-
-    Entry {
-        "name": ["william"],
-        "mail": ["william@email", "email@william"],
-        "uuid": ["..."],
-    }
-
-There are only a few rules that are true in entries.
-
-* UUID
-
-All entries *must* have a UUID attribute, and there must ONLY exist a single value. This UUID ava
-MUST be unique within the database, regardless of entry state (live, recycled, tombstoned etc).
-
-* Zero values
-
-An attribute with zero values, is removed from the entry.
-
-* Unsorted
-
-Values within an attribute are "not sorted" in any meaningful way for a client utility (in reality
-they are sorted by an undefined internal order for fast lookup/insertion).
-
-
-That's it.
diff --git a/designs/filter.rst b/designs/filter.rst
deleted file mode 100644
index c403dac63..000000000
--- a/designs/filter.rst
+++ /dev/null
@@ -1,163 +0,0 @@
-
-Filters
--------
-
-Filters (along with Entries and Schema) is one of the foundational concepts in the
-design of KaniDM. They are used in nearly every aspect of the server to provide
-checking and searching over entry sets.
-
-A filter is a set of requirements where the attribute-value pairs of the entry must
-conform for the filter to be considered a "match". This has two useful properties:
-
-* We can apply a filter to a single entry to determine quickly assertions about that entry
-hold true.
-* We can apply a filter to a set of entries to reduce the set only to the matching entries.
-
-Filter Construction
--------------------
-
-Filters are rooted in relational algebra and set mathematics. I am not an expert on either
-topic, and have learnt from experience about there design.
-
-* Presence
-
-The simplest filter is a "presence" test. It asserts that some attribute, regardless
-of it's value exists on the entry. For example, the entries below:
-
-    Entry {
-        name: william
-    }
-
-    Entry {
-        description: test
-    }
-
-If we apply "Pres(name)", then we would only see the entry containing "name: william" as a matching
-result.
-
-* Equality
-
-Equality checks that an attribute and value are present on an entry. For example
-
-    Entry {
-        name: william
-    }
-
-    Entry {
-        name: test
-    }
-
-If we apply Eq(name, william) only the first entry would match. If the attribute is multivalued,
-we only assert that one value in the set is there. For example:
-
-    Entry {
-        name: william
-    }
-
-    Entry {
-        name: test
-        name: claire
-    }
-
-In this case application of Eq(name, claire), would match the second entry as name=claire is present
-in the multivalue set.
-
-* Sub
-
-Substring checks that the substring exists in an attribute of the entry. This is a specialisation
-of equality, where the same value and multivalue handling holds true.
-
-    Entry {
-        name: william
-    }
-
-In this example, Sub(name, liam) would match, but Sub(name, air) would not.
-
-* Or
-
-Or contains multiple filters and asserts that provided *any* of them are true, this condition
-will hold true. For example:
-
-    Entry {
-        name: claire
-    }
-
-In this the filter Or(Eq(name, claire), Eq(name, william)) will be true, because the Eq(name, claire)
-is true, thus the Or condition is true. If nothing inside the Or is true, it returns false.
-
-* And
-
-And checks that all inner filter conditions are true, to return true. If any are false, it will
-yield false.
-
-    Entry {
-        name: claire
-        class: person
-    }
-
-For this example, And(Eq(class, person), Eq(name, claire)) would be true, but And(Eq(class, group),
-Eq(name, claire)) would be false.
-
-* AndNot
-
-AndNot is different to a logical not.
-
-If we had Not(Eq(name, claire)), then the logical result is "All entries where name is not
-claire". However, this is (today...) not very efficient. Instead, we have "AndNot" which asserts
-that a condition of a candidate set is not true. So the operation: AndNot(Eq(name, claire)) would
-yield and empty set. AndNot is important when you need to check that something is also not true
-but without getting all entries where that not holds. An example:
-
-    Entry {
-        name: william
-        class: person
-    }
-
-    Entry {
-        name: claire
-        class: person
-    }
-
-In this case "And(Eq(class, person), AndNot(Eq(name, claire)))". This would find all persons
-where their name is also not claire: IE william. However, the following would be empty result.
-"AndNot(Eq(name, claire))". This is because there is no candidate set already existing, so there
-is nothing to return.
-
-
-Filter Schema Considerations
-----------------------------
-
-In order to make filters work properly, the server normalises entries on input to allow simpler
-comparisons and ordering in the actual search phases. This means that for a filter to operate
-it too must be normalised an valid.
-
-If a filter requests an operation on an attribute we do not know of in schema, the operation
-is rejected. This is to prevent a denial of service attack where Eq(NonExist, value) would cause
-un-indexed full table scans to be performed consuming server resources.
-
-In a filter request, the Attribute name in use is normalised according to schema, as it
-the search value. For example, Eq(nAmE, Claire) would normalise to Eq(name, claire) as both
-attrname and name are UTF8_INSENSITIVE. However, displayName is case sensitive so a search like:
-Eq(displayName, Claire) would become Eq(displayname, Claire). Note Claire remains cased.
-
-This means that instead of having costly routines to normalise entries on each read and search,
-we can normalise on entry modify and create, then we only need to ensure filters match and we
-can do basic string comparisons as needed.
-
-
-Discussion
-----------
-
-Is it worth adding a true "not" type, and using that instead? It would be extremely costly on
-indexes or filter testing, but would logically be better than AndNot as a filter term.
-
-Not could be implemented as Not(<filter>) -> And(Pres(class), AndNot(<filter>)) which would
-yield the equivalent result, but it would consume a very large index component. In this case
-though, filter optimising would promote Eq > Pres, so we would should be able to skip to a candidate
-test, or we access the index and get the right result anyway over fulltable scan.
-
-Additionally, Not/AndNot could be security risks because they could be combined with And
-queries that allow them to bypass the filter-attribute permission check. Is there an example
-of using And(Eq, AndNot(Eq)) that could be used to provide information disclosure about
-the status of an attribute given a result/non result where the AndNot is false/true?
-
diff --git a/designs/schema.rst b/designs/schema.rst
deleted file mode 100644
index e1f2a2e79..000000000
--- a/designs/schema.rst
+++ /dev/null
@@ -1,117 +0,0 @@
-
-Schema
-------
-
-Schema is one of the three foundational concepts of the server, along with filters and entries.
-Schema defines how attribute values *must* be represented, sorted, indexed and more. It also
-defines what attributes could exist on an entry.
-
-Why Schema?
------------
-
-The way that the server is designed, you could extract the backend parts and just have "Entries"
-with no schema. That's totally valid if you want!
-
-However, usually in the world all data maintains some form of structure, even if loose. We want to
-have ways to say a database entry represents a person, and what a person requires.
-
-Attributes
-----------
-
-In the entry document, I discuss that avas have a single attribute, and 1 to infinite values that
-are utf8 case sensitive strings. Which schema attribute types we can constrain these avas on an
-entry.
-
-For example, while the entry may be capable of holding 1 to infinite "name" values, the schema
-defines that only one name is valid on the entry. Addition of a second name would be a violation. Of
-course, schema also defines "multi-value", our usual 1 to infinite value storage concept.
-
-Schema can also define that values of the attribute must conform to a syntax. For example, name
-is a case *insensitive* string. So despite the fact that avas store case-sensitive data, all inputs
-to name will be normalised to a lowercase form for faster matching. There are a number of syntax
-types built into the server, and we'll add more later.
-
-Finally, an attribute can be defined as indexed, and in which ways it can be indexed. We often will
-want to search for "mail" on a person, so we can define in the schema that mail is indexed by the
-backend indexing system. We don't define *how* the index is built - only that some index should exist
-for when a query is made.
-
-Classes
--------
-
-So while we have attributes that define "what is valid in the avas", classes define "which attributes
-can exist on the entry itself".
-
-A class defines requirements that are "may", "must", "systemmay", "systemmust". The system- variants
-exist so that we can ship what we believe are good definitions. The may and must exists so you can
-edit and extend our classes with your extra attribute fields (but it may be better just to add
-your own class types :) )
-
-An attribute in a class marked as "may" is optional on the entry. It can be present as an ava, or
-it may not be.
-
-An attribute in a class marked as "must" is required on the entry. An ava that is valid to the
-attribute syntax is required on this entry.
-
-An attribute that is not "may" or "must" can not be present on this entry.
-
-Lets imagine we have a class (pseudo example) of "person". We'll make it:
-
-    Class {
-        "name": "person",
-        "systemmust": ["name"],
-        "systemmay": ["mail"]
-    }
-
-If we had an entry such as:
-
-    Entry {
-        "class": ["person"],
-        "uid": ["bob"],
-        "mail": ["bob@email"]
-    }
-
-This would be invalid: We are missing the "systemmust" name attribute. It's also invalid because uid
-is not present in systemmust or systemmay.
-
-    Entry {
-        "class": ["person"],
-        "name": ["claire"],
-        "mail": ["claire@email"]
-    }
-
-This entry is now valid. We have met the must requirement of name, and we have the optional
-mail ava populated. The following is also valid.
-
-    Entry {
-        "class": ["person"],
-        "name": ["claire"],
-    }
-
-Classes are 'additive' - this means given two classes on an entry, the must/may are unioned, and the
-strongest rule is applied to attribute presence.
-
-Imagine we have also
-
-    Class {
-        "name": "person",
-        "systemmust": ["name"],
-        "systemmay": ["mail"]
-    }
-
-    Class {
-        "name": "emailperson",
-        "systemmust": ["mail"]
-    }
-
-With our entry now, this turns the "may" from person, into a "must" because of the emailperson
-class. On our entry Claire, that means this entry below is now invalid:
-
-    Entry {
-        "class": ["person", "emailperson"],
-        "name": ["claire"],
-    }
-
-Simply adding an ava of mail back to the entry would make it valid once again.
-
-
diff --git a/kanidmd/src/lib/entry.rs b/kanidmd/src/lib/entry.rs
index d3b9a8a6e..9bca082af 100644
--- a/kanidmd/src/lib/entry.rs
+++ b/kanidmd/src/lib/entry.rs
@@ -1,3 +1,29 @@
+//! Entries are the base unit of object storage in the server. This is one of the three foundational
+//! concepts along with [`filter`]s and [`schema`] that everything else builds upon.
+//!
+//! An [`Entry`] is a collection of attribute-value sets. There are sometimes called attribute value
+//! assertions, or avas. The attribute is a "key" and it holds 1 to infinitite associtade values
+//! with no ordering. An entry has many avas. A pseudo example, minus schema and typing:
+//!
+//! ```
+//! Entry {
+//!   "name": ["william"],
+//!   "uuid": ["..."],
+//!   "mail": ["maila@example.com", "mailb@example.com"],
+//! }
+//! ```
+//!
+//! There are three rules for entries:
+//! * Must have an ava for UUID containing a single value.
+//! * Any ava with zero values will be removed.
+//! * Avas are stored with no sorting.
+//!
+//! For more, see the [`Entry`] type.
+//!
+//! [`Entry`]: struct.Entry.html
+//! [`filter`]: ../filter/index.html
+//! [`schema`]: ../schema/index.html
+
 // use serde_json::{Error, Value};
 use crate::audit::AuditScope;
 use crate::credential::Credential;
@@ -166,6 +192,33 @@ pub struct EntryReduced {
     uuid: Uuid,
 }
 
+/// Entry is the core data storage type of the server. Almost every aspect of the server is
+/// designed to read, handle and manipulate entries.
+///
+/// Entries store attribute value assertions, or ava. These are sets of key-values.
+///
+/// Entries have a lifecycle within a single operation, and as part of replication.
+/// The lifecycle for operations is defined through state and valid types. Each entry has a pair
+/// Of these types at anytime. The first is the ava [`schema`] and [`access`] control assertion
+/// state. This is represented by the type `VALID` as one of `EntryValid`, `EntryInvalid` or
+/// `EntryReduced`. Every entry starts as `EntryInvalid`, and when checked by the schema for
+/// correctness, transitions to `EntryValid`. While an entry is `EntryValid` it can not be
+/// altered - you must invalidate it to `EntryInvalid`, then modify, then check again.
+/// An entry that has had access controls applied moves from `EntryValid` to `EntryReduced`,
+/// to show that the avas have reduced to the valid read set of the current [`event`] user.
+///
+/// The second type of `STATE` reperesents the database commit state and internal db ID's. A
+/// new entry that has never been committed is `EntryNew`, but an entry that has been retrieved
+/// from the database is `EntryCommitted`. This affects the operations you can apply IE modify
+/// or delete.
+///
+/// These types exist to prevent at compile time, mishandling of Entries, to ensure they are always
+/// handled with the correct lifecycles and processes.
+///
+/// [`schema`]: ../schema/index.html
+/// [`access`]: ../access/index.html
+/// [`event`]: ../event/index.html
+///
 #[derive(Debug)]
 pub struct Entry<VALID, STATE> {
     valid: VALID,
diff --git a/kanidmd/src/lib/filter.rs b/kanidmd/src/lib/filter.rs
index 6f85bc5eb..6f838aa5d 100644
--- a/kanidmd/src/lib/filter.rs
+++ b/kanidmd/src/lib/filter.rs
@@ -1,6 +1,12 @@
-// This represents a filtering query. This can be done
-// in parallel map/reduce style, or directly on a single
-// entry to assert it matches.
+//! [`Filter`]s are one of the three foundational concepts of the design in kanidm.
+//! They are used in nearly every aspect ofthe server to provide searching of
+//! datasets, and assertion of entry properties.
+//!
+//! A filter is a logical statement of properties that an [`Entry`] and it's
+//! avas must uphold to be considered true.
+//!
+//! [`Filter`]: struct.Filter.html
+//! [`Entry`]: ../entry/struct.Entry.html
 
 use crate::audit::AuditScope;
 use crate::event::{Event, EventOrigin};
@@ -124,6 +130,27 @@ pub struct FilterValidResolved {
     inner: FilterResolved,
 }
 
+/// A `Filter` is a logical set of assertions about the state of an [`Entry`] and
+/// it's avas. `Filter`s are built from a set of possible assertions.
+///
+/// * `Pres`ence. An ava of that attribute's name exists, with any value on the [`Entry`].
+/// * `Eq`uality. An ava of the attribute exists and contains this matching value.
+/// * `Sub`string. An ava of the attribute exists and has a substring containing the requested value.
+/// * `Or`. Contains multiple filters and asserts at least one is true.
+/// * `And`. Contains multiple filters and asserts all of them are true.
+/// * `AndNot`. This is different to a "logical not" operation. This asserts that a condition is not
+/// true in the current candidate set. A search of `AndNot` alone will yield not results, but an
+/// `AndNot` in an `And` query will assert that a condition can not hold.
+///
+/// `Filter`s for security reasons are validated by the schema to assert all requested attributes
+/// are valid and exist in the schema so that they can have their indexes correctly used. This avoids
+/// a denial of service attack that may lead to full-table scans.
+///
+/// This `Filter` validation state is in the `STATE` attribute and will be either `FilterInvalid`
+/// or `FilterValid`. The `Filter` must be checked by the schema to move to `FilterValid`. This
+/// helps to prevent errors at compile time to assert `Filters` are secuerly. checked
+///
+/// [`Entry`]: ../entry/struct.Entry.html
 #[derive(Debug, Clone)]
 pub struct Filter<STATE> {
     state: STATE,
diff --git a/kanidmd/src/lib/schema.rs b/kanidmd/src/lib/schema.rs
index 2bff958f9..765625d73 100644
--- a/kanidmd/src/lib/schema.rs
+++ b/kanidmd/src/lib/schema.rs
@@ -1,3 +1,21 @@
+//! [`Schema`] is one of the foundational concepts of the server. It provides a
+//! set of rules to enforce that [`Entries`] ava's must be compliant to, to be
+//! considered valid for commit to the database. This allows us to provide
+//! requirements and structure as to what an [`Entry`] must have and may contain
+//! which enables many other parts to function.
+//!
+//! To define this structure we define [`Attributes`] that provide rules for how
+//! and ava should be structured. We also define [`Classes`] that define
+//! the rules of which [`Attributes`] may or must exist on an [`Entry`] for it
+//! to be considered valid. An [`Entry`] must have at least 1 to infinite
+//! [`Classes`]. [`Classes'] are additive.
+//!
+//! [`Schema`]: struct.Schema.html
+//! [`Entries`]: ../entry/index.html
+//! [`Entry`]: ../entry/index.html
+//! [`Attributes`]: struct.SchemaAttribute.html
+//! [`Classes`]: struct.SchemaClass.html
+
 use crate::audit::AuditScope;
 use crate::constants::*;
 use crate::entry::{Entry, EntryCommitted, EntryNew, EntryValid};
@@ -23,24 +41,45 @@ lazy_static! {
     static ref PVCLASS_CLASSTYPE: PartialValue = PartialValue::new_class("classtype");
 }
 
+/// Schema stores the set of [`Classes`] and [`Attributes`] that the server will
+/// use to validate [`Entries`], [`Filters`] and [`Modifications`]. Additionally the
+/// schema stores an extracted copy of the current attribute indexing metadata that
+/// is used by the backend during queries.
+///
+/// [`Filters`]: ../filter/index.html
+/// [`Modifications`]: ../modify/index.html
+/// [`Entries`]: ../entry/index.html
+/// [`Attributes`]: struct.SchemaAttribute.html
+/// [`Classes`]: struct.SchemaClass.html
 pub struct Schema {
     classes: BptreeMap<String, SchemaClass>,
     attributes: BptreeMap<String, SchemaAttribute>,
     idxmeta: BptreeMap<String, IndexType>,
 }
 
+/// A writable transaction of the working schema set. You should not change this directly,
+/// the writability is for the server internally to allow reloading of the schema. Changes
+/// you make will be lost when the server re-reads the schema from disk.
 pub struct SchemaWriteTransaction<'a> {
     classes: BptreeMapWriteTxn<'a, String, SchemaClass>,
     attributes: BptreeMapWriteTxn<'a, String, SchemaAttribute>,
     idxmeta: BptreeMapWriteTxn<'a, String, IndexType>,
 }
 
+/// A readonly transaction of the working schema set.
 pub struct SchemaReadTransaction {
     classes: BptreeMapReadTxn<String, SchemaClass>,
     attributes: BptreeMapReadTxn<String, SchemaAttribute>,
     idxmeta: BptreeMapReadTxn<String, IndexType>,
 }
 
+/// An item reperesenting an attribute and the rules that enforce it. These rules enforce if an
+/// attribute on an [`Entry`] may be single or multi value, must be unique amongst all other types
+/// of this attribute, if the attribute should be [`indexed`], and what type of data [`syntax`] it may hold.
+///
+/// [`Entry`]: ../entry/index.html
+/// [`indexed`]: ../value/enum.IndexType.html
+/// [`syntax`]: ../value/enum.SyntaxType.html
 #[derive(Debug, Clone)]
 pub struct SchemaAttribute {
     // Is this ... used?
@@ -403,6 +442,23 @@ impl SchemaAttribute {
     }
 }
 
+/// An item reperesenting a class and the rules for that class. These rules enforce that an
+/// [`Entry`]'s avas conform to a set of requirements, giving structure to an entry about
+/// what avas must or may exist. The kanidm project provides attributes in `systemmust` and
+/// `systemmay`, which can not be altered. An administrator may extend these in the `must`
+/// and `may` attributes.
+///
+/// Classes are additive, meaning that if there are two classes, the `may` rules of both union,
+/// and that if an attribute is `must` on one class, and `may` in another, the `must` rule
+/// takes precedence. It is not possible to combine classes in an incompatible way due to these
+/// rules.
+///
+/// That in mind, and entry that has one of every possible class would probably be nonsensical,
+/// but the addition rules make it easy to construct and understand with concepts like [`access`]
+/// controls or accounts and posix extensions.
+///
+/// [`Entry`]: ../entry/index.html
+/// [`access`]: ../access/index.html
 #[derive(Debug, Clone)]
 pub struct SchemaClass {
     // Is this used?
diff --git a/kanidmd/src/lib/server.rs b/kanidmd/src/lib/server.rs
index 7dd8e6bf9..0267e1aca 100644
--- a/kanidmd/src/lib/server.rs
+++ b/kanidmd/src/lib/server.rs
@@ -1,3 +1,6 @@
+//! `server` contains the query server, which is the main high level construction
+//! to coordinate queries and operations in the server.
+
 // This is really only used for long lived, high level types that need clone
 // that otherwise can't be cloned. Think Mutex.
 // use actix::prelude::*;
@@ -46,6 +49,15 @@ lazy_static! {
 // This is the core of the server. It implements all
 // the search and modify actions, applies access controls
 // and get's everything ready to push back to the fe code
+/// The `QueryServerTransaction` trait provides a set of common read only operations to be
+/// shared between [`QueryServerReadTransaction`] and [`QueryServerWriteTransaction`]s.
+///
+/// These operations tend to be high level constructions, generally different types of searches
+/// that are capable of taking different types of parameters and applying access controls or not,
+/// impersonating accounts, or bypassing these via internal searches.
+///
+/// [`QueryServerReadTransaction`]: struct.QueryServerReadTransaction.html
+/// [`QueryServerWriteTransaction`]: struct.QueryServerWriteTransaction.html
 pub trait QueryServerTransaction {
     type BackendTransactionType: BackendTransaction;
     fn get_be_txn(&self) -> &Self::BackendTransactionType;
@@ -56,6 +68,15 @@ pub trait QueryServerTransaction {
     type AccessControlsTransactionType: AccessControlsTransaction;
     fn get_accesscontrols(&self) -> &Self::AccessControlsTransactionType;
 
+    /// Conduct a search and apply access controls to yield a set of entries that
+    /// have been reduced to the set of user visible avas. Note that if you provide
+    /// a `SearchEvent` for the internal user, this query will fail. It is invalid for
+    /// the [`access`] module to attempt to reduce avas for internal searches, and you
+    /// should use [`fn search`] instead.
+    ///
+    /// [`SearchEvent`]: ../event/struct.SearchEvent.html
+    /// [`access`]: ../access/index.html
+    /// [`fn search`]: trait.QueryServerTransaction.html#method.search
     fn search_ext(
         &self,
         au: &mut AuditScope,
@@ -79,6 +100,7 @@ pub trait QueryServerTransaction {
         Ok(entries_filtered)
     }
 
+
     fn search(
         &self,
         au: &mut AuditScope,