From b048115698625f11d1870a3e4af1d663f43c4976 Mon Sep 17 00:00:00 2001 From: Firstyear Date: Mon, 17 Feb 2020 08:09:33 +1030 Subject: [PATCH] 20200216 document internals (#187) Add documentation of internal api --- README.md | 6 +- designs/entries.rst | 38 --------- designs/filter.rst | 163 -------------------------------------- designs/schema.rst | 117 --------------------------- kanidmd/src/lib/entry.rs | 53 +++++++++++++ kanidmd/src/lib/filter.rs | 33 +++++++- kanidmd/src/lib/schema.rs | 56 +++++++++++++ kanidmd/src/lib/server.rs | 22 +++++ 8 files changed, 166 insertions(+), 322 deletions(-) delete mode 100644 designs/entries.rst delete mode 100644 designs/filter.rst delete mode 100644 designs/schema.rst diff --git a/README.md b/README.md index 188940021..c19cae384 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,11 @@ For more see the [kanidm book] ### Designs -See the [designs] folder +See the [designs] folder, and compile the private documentation locally: + +``` +cargo doc --document-private-items --open --no-deps +``` [designs]: https://github.com/kanidm/kanidm/tree/master/designs diff --git a/designs/entries.rst b/designs/entries.rst deleted file mode 100644 index a5c865007..000000000 --- a/designs/entries.rst +++ /dev/null @@ -1,38 +0,0 @@ - -Entries -------- - -Entries are the base unit of data in this server. This is one of the three foundational concepts -along with filters and schema that everything thing else builds upon. - -What is an Entry? ------------------ - -An entry is a collection of attribute-values. These are sometimes called attribute-value-assertions, -attr-value sets. The attribute is a "key", and it holds 1 to infinite values associated. An entry -can have many avas associated, which creates the entry as a whole. An example entry (minus schema): - - Entry { - "name": ["william"], - "mail": ["william@email", "email@william"], - "uuid": ["..."], - } - -There are only a few rules that are true in entries. - -* UUID - -All entries *must* have a UUID attribute, and there must ONLY exist a single value. This UUID ava -MUST be unique within the database, regardless of entry state (live, recycled, tombstoned etc). - -* Zero values - -An attribute with zero values, is removed from the entry. - -* Unsorted - -Values within an attribute are "not sorted" in any meaningful way for a client utility (in reality -they are sorted by an undefined internal order for fast lookup/insertion). - - -That's it. diff --git a/designs/filter.rst b/designs/filter.rst deleted file mode 100644 index c403dac63..000000000 --- a/designs/filter.rst +++ /dev/null @@ -1,163 +0,0 @@ - -Filters -------- - -Filters (along with Entries and Schema) is one of the foundational concepts in the -design of KaniDM. They are used in nearly every aspect of the server to provide -checking and searching over entry sets. - -A filter is a set of requirements where the attribute-value pairs of the entry must -conform for the filter to be considered a "match". This has two useful properties: - -* We can apply a filter to a single entry to determine quickly assertions about that entry -hold true. -* We can apply a filter to a set of entries to reduce the set only to the matching entries. - -Filter Construction -------------------- - -Filters are rooted in relational algebra and set mathematics. I am not an expert on either -topic, and have learnt from experience about there design. - -* Presence - -The simplest filter is a "presence" test. It asserts that some attribute, regardless -of it's value exists on the entry. For example, the entries below: - - Entry { - name: william - } - - Entry { - description: test - } - -If we apply "Pres(name)", then we would only see the entry containing "name: william" as a matching -result. - -* Equality - -Equality checks that an attribute and value are present on an entry. For example - - Entry { - name: william - } - - Entry { - name: test - } - -If we apply Eq(name, william) only the first entry would match. If the attribute is multivalued, -we only assert that one value in the set is there. For example: - - Entry { - name: william - } - - Entry { - name: test - name: claire - } - -In this case application of Eq(name, claire), would match the second entry as name=claire is present -in the multivalue set. - -* Sub - -Substring checks that the substring exists in an attribute of the entry. This is a specialisation -of equality, where the same value and multivalue handling holds true. - - Entry { - name: william - } - -In this example, Sub(name, liam) would match, but Sub(name, air) would not. - -* Or - -Or contains multiple filters and asserts that provided *any* of them are true, this condition -will hold true. For example: - - Entry { - name: claire - } - -In this the filter Or(Eq(name, claire), Eq(name, william)) will be true, because the Eq(name, claire) -is true, thus the Or condition is true. If nothing inside the Or is true, it returns false. - -* And - -And checks that all inner filter conditions are true, to return true. If any are false, it will -yield false. - - Entry { - name: claire - class: person - } - -For this example, And(Eq(class, person), Eq(name, claire)) would be true, but And(Eq(class, group), -Eq(name, claire)) would be false. - -* AndNot - -AndNot is different to a logical not. - -If we had Not(Eq(name, claire)), then the logical result is "All entries where name is not -claire". However, this is (today...) not very efficient. Instead, we have "AndNot" which asserts -that a condition of a candidate set is not true. So the operation: AndNot(Eq(name, claire)) would -yield and empty set. AndNot is important when you need to check that something is also not true -but without getting all entries where that not holds. An example: - - Entry { - name: william - class: person - } - - Entry { - name: claire - class: person - } - -In this case "And(Eq(class, person), AndNot(Eq(name, claire)))". This would find all persons -where their name is also not claire: IE william. However, the following would be empty result. -"AndNot(Eq(name, claire))". This is because there is no candidate set already existing, so there -is nothing to return. - - -Filter Schema Considerations ----------------------------- - -In order to make filters work properly, the server normalises entries on input to allow simpler -comparisons and ordering in the actual search phases. This means that for a filter to operate -it too must be normalised an valid. - -If a filter requests an operation on an attribute we do not know of in schema, the operation -is rejected. This is to prevent a denial of service attack where Eq(NonExist, value) would cause -un-indexed full table scans to be performed consuming server resources. - -In a filter request, the Attribute name in use is normalised according to schema, as it -the search value. For example, Eq(nAmE, Claire) would normalise to Eq(name, claire) as both -attrname and name are UTF8_INSENSITIVE. However, displayName is case sensitive so a search like: -Eq(displayName, Claire) would become Eq(displayname, Claire). Note Claire remains cased. - -This means that instead of having costly routines to normalise entries on each read and search, -we can normalise on entry modify and create, then we only need to ensure filters match and we -can do basic string comparisons as needed. - - -Discussion ----------- - -Is it worth adding a true "not" type, and using that instead? It would be extremely costly on -indexes or filter testing, but would logically be better than AndNot as a filter term. - -Not could be implemented as Not() -> And(Pres(class), AndNot()) which would -yield the equivalent result, but it would consume a very large index component. In this case -though, filter optimising would promote Eq > Pres, so we would should be able to skip to a candidate -test, or we access the index and get the right result anyway over fulltable scan. - -Additionally, Not/AndNot could be security risks because they could be combined with And -queries that allow them to bypass the filter-attribute permission check. Is there an example -of using And(Eq, AndNot(Eq)) that could be used to provide information disclosure about -the status of an attribute given a result/non result where the AndNot is false/true? - diff --git a/designs/schema.rst b/designs/schema.rst deleted file mode 100644 index e1f2a2e79..000000000 --- a/designs/schema.rst +++ /dev/null @@ -1,117 +0,0 @@ - -Schema ------- - -Schema is one of the three foundational concepts of the server, along with filters and entries. -Schema defines how attribute values *must* be represented, sorted, indexed and more. It also -defines what attributes could exist on an entry. - -Why Schema? ------------ - -The way that the server is designed, you could extract the backend parts and just have "Entries" -with no schema. That's totally valid if you want! - -However, usually in the world all data maintains some form of structure, even if loose. We want to -have ways to say a database entry represents a person, and what a person requires. - -Attributes ----------- - -In the entry document, I discuss that avas have a single attribute, and 1 to infinite values that -are utf8 case sensitive strings. Which schema attribute types we can constrain these avas on an -entry. - -For example, while the entry may be capable of holding 1 to infinite "name" values, the schema -defines that only one name is valid on the entry. Addition of a second name would be a violation. Of -course, schema also defines "multi-value", our usual 1 to infinite value storage concept. - -Schema can also define that values of the attribute must conform to a syntax. For example, name -is a case *insensitive* string. So despite the fact that avas store case-sensitive data, all inputs -to name will be normalised to a lowercase form for faster matching. There are a number of syntax -types built into the server, and we'll add more later. - -Finally, an attribute can be defined as indexed, and in which ways it can be indexed. We often will -want to search for "mail" on a person, so we can define in the schema that mail is indexed by the -backend indexing system. We don't define *how* the index is built - only that some index should exist -for when a query is made. - -Classes -------- - -So while we have attributes that define "what is valid in the avas", classes define "which attributes -can exist on the entry itself". - -A class defines requirements that are "may", "must", "systemmay", "systemmust". The system- variants -exist so that we can ship what we believe are good definitions. The may and must exists so you can -edit and extend our classes with your extra attribute fields (but it may be better just to add -your own class types :) ) - -An attribute in a class marked as "may" is optional on the entry. It can be present as an ava, or -it may not be. - -An attribute in a class marked as "must" is required on the entry. An ava that is valid to the -attribute syntax is required on this entry. - -An attribute that is not "may" or "must" can not be present on this entry. - -Lets imagine we have a class (pseudo example) of "person". We'll make it: - - Class { - "name": "person", - "systemmust": ["name"], - "systemmay": ["mail"] - } - -If we had an entry such as: - - Entry { - "class": ["person"], - "uid": ["bob"], - "mail": ["bob@email"] - } - -This would be invalid: We are missing the "systemmust" name attribute. It's also invalid because uid -is not present in systemmust or systemmay. - - Entry { - "class": ["person"], - "name": ["claire"], - "mail": ["claire@email"] - } - -This entry is now valid. We have met the must requirement of name, and we have the optional -mail ava populated. The following is also valid. - - Entry { - "class": ["person"], - "name": ["claire"], - } - -Classes are 'additive' - this means given two classes on an entry, the must/may are unioned, and the -strongest rule is applied to attribute presence. - -Imagine we have also - - Class { - "name": "person", - "systemmust": ["name"], - "systemmay": ["mail"] - } - - Class { - "name": "emailperson", - "systemmust": ["mail"] - } - -With our entry now, this turns the "may" from person, into a "must" because of the emailperson -class. On our entry Claire, that means this entry below is now invalid: - - Entry { - "class": ["person", "emailperson"], - "name": ["claire"], - } - -Simply adding an ava of mail back to the entry would make it valid once again. - - diff --git a/kanidmd/src/lib/entry.rs b/kanidmd/src/lib/entry.rs index d3b9a8a6e..9bca082af 100644 --- a/kanidmd/src/lib/entry.rs +++ b/kanidmd/src/lib/entry.rs @@ -1,3 +1,29 @@ +//! Entries are the base unit of object storage in the server. This is one of the three foundational +//! concepts along with [`filter`]s and [`schema`] that everything else builds upon. +//! +//! An [`Entry`] is a collection of attribute-value sets. There are sometimes called attribute value +//! assertions, or avas. The attribute is a "key" and it holds 1 to infinitite associtade values +//! with no ordering. An entry has many avas. A pseudo example, minus schema and typing: +//! +//! ``` +//! Entry { +//! "name": ["william"], +//! "uuid": ["..."], +//! "mail": ["maila@example.com", "mailb@example.com"], +//! } +//! ``` +//! +//! There are three rules for entries: +//! * Must have an ava for UUID containing a single value. +//! * Any ava with zero values will be removed. +//! * Avas are stored with no sorting. +//! +//! For more, see the [`Entry`] type. +//! +//! [`Entry`]: struct.Entry.html +//! [`filter`]: ../filter/index.html +//! [`schema`]: ../schema/index.html + // use serde_json::{Error, Value}; use crate::audit::AuditScope; use crate::credential::Credential; @@ -166,6 +192,33 @@ pub struct EntryReduced { uuid: Uuid, } +/// Entry is the core data storage type of the server. Almost every aspect of the server is +/// designed to read, handle and manipulate entries. +/// +/// Entries store attribute value assertions, or ava. These are sets of key-values. +/// +/// Entries have a lifecycle within a single operation, and as part of replication. +/// The lifecycle for operations is defined through state and valid types. Each entry has a pair +/// Of these types at anytime. The first is the ava [`schema`] and [`access`] control assertion +/// state. This is represented by the type `VALID` as one of `EntryValid`, `EntryInvalid` or +/// `EntryReduced`. Every entry starts as `EntryInvalid`, and when checked by the schema for +/// correctness, transitions to `EntryValid`. While an entry is `EntryValid` it can not be +/// altered - you must invalidate it to `EntryInvalid`, then modify, then check again. +/// An entry that has had access controls applied moves from `EntryValid` to `EntryReduced`, +/// to show that the avas have reduced to the valid read set of the current [`event`] user. +/// +/// The second type of `STATE` reperesents the database commit state and internal db ID's. A +/// new entry that has never been committed is `EntryNew`, but an entry that has been retrieved +/// from the database is `EntryCommitted`. This affects the operations you can apply IE modify +/// or delete. +/// +/// These types exist to prevent at compile time, mishandling of Entries, to ensure they are always +/// handled with the correct lifecycles and processes. +/// +/// [`schema`]: ../schema/index.html +/// [`access`]: ../access/index.html +/// [`event`]: ../event/index.html +/// #[derive(Debug)] pub struct Entry { valid: VALID, diff --git a/kanidmd/src/lib/filter.rs b/kanidmd/src/lib/filter.rs index 6f85bc5eb..6f838aa5d 100644 --- a/kanidmd/src/lib/filter.rs +++ b/kanidmd/src/lib/filter.rs @@ -1,6 +1,12 @@ -// This represents a filtering query. This can be done -// in parallel map/reduce style, or directly on a single -// entry to assert it matches. +//! [`Filter`]s are one of the three foundational concepts of the design in kanidm. +//! They are used in nearly every aspect ofthe server to provide searching of +//! datasets, and assertion of entry properties. +//! +//! A filter is a logical statement of properties that an [`Entry`] and it's +//! avas must uphold to be considered true. +//! +//! [`Filter`]: struct.Filter.html +//! [`Entry`]: ../entry/struct.Entry.html use crate::audit::AuditScope; use crate::event::{Event, EventOrigin}; @@ -124,6 +130,27 @@ pub struct FilterValidResolved { inner: FilterResolved, } +/// A `Filter` is a logical set of assertions about the state of an [`Entry`] and +/// it's avas. `Filter`s are built from a set of possible assertions. +/// +/// * `Pres`ence. An ava of that attribute's name exists, with any value on the [`Entry`]. +/// * `Eq`uality. An ava of the attribute exists and contains this matching value. +/// * `Sub`string. An ava of the attribute exists and has a substring containing the requested value. +/// * `Or`. Contains multiple filters and asserts at least one is true. +/// * `And`. Contains multiple filters and asserts all of them are true. +/// * `AndNot`. This is different to a "logical not" operation. This asserts that a condition is not +/// true in the current candidate set. A search of `AndNot` alone will yield not results, but an +/// `AndNot` in an `And` query will assert that a condition can not hold. +/// +/// `Filter`s for security reasons are validated by the schema to assert all requested attributes +/// are valid and exist in the schema so that they can have their indexes correctly used. This avoids +/// a denial of service attack that may lead to full-table scans. +/// +/// This `Filter` validation state is in the `STATE` attribute and will be either `FilterInvalid` +/// or `FilterValid`. The `Filter` must be checked by the schema to move to `FilterValid`. This +/// helps to prevent errors at compile time to assert `Filters` are secuerly. checked +/// +/// [`Entry`]: ../entry/struct.Entry.html #[derive(Debug, Clone)] pub struct Filter { state: STATE, diff --git a/kanidmd/src/lib/schema.rs b/kanidmd/src/lib/schema.rs index 2bff958f9..765625d73 100644 --- a/kanidmd/src/lib/schema.rs +++ b/kanidmd/src/lib/schema.rs @@ -1,3 +1,21 @@ +//! [`Schema`] is one of the foundational concepts of the server. It provides a +//! set of rules to enforce that [`Entries`] ava's must be compliant to, to be +//! considered valid for commit to the database. This allows us to provide +//! requirements and structure as to what an [`Entry`] must have and may contain +//! which enables many other parts to function. +//! +//! To define this structure we define [`Attributes`] that provide rules for how +//! and ava should be structured. We also define [`Classes`] that define +//! the rules of which [`Attributes`] may or must exist on an [`Entry`] for it +//! to be considered valid. An [`Entry`] must have at least 1 to infinite +//! [`Classes`]. [`Classes'] are additive. +//! +//! [`Schema`]: struct.Schema.html +//! [`Entries`]: ../entry/index.html +//! [`Entry`]: ../entry/index.html +//! [`Attributes`]: struct.SchemaAttribute.html +//! [`Classes`]: struct.SchemaClass.html + use crate::audit::AuditScope; use crate::constants::*; use crate::entry::{Entry, EntryCommitted, EntryNew, EntryValid}; @@ -23,24 +41,45 @@ lazy_static! { static ref PVCLASS_CLASSTYPE: PartialValue = PartialValue::new_class("classtype"); } +/// Schema stores the set of [`Classes`] and [`Attributes`] that the server will +/// use to validate [`Entries`], [`Filters`] and [`Modifications`]. Additionally the +/// schema stores an extracted copy of the current attribute indexing metadata that +/// is used by the backend during queries. +/// +/// [`Filters`]: ../filter/index.html +/// [`Modifications`]: ../modify/index.html +/// [`Entries`]: ../entry/index.html +/// [`Attributes`]: struct.SchemaAttribute.html +/// [`Classes`]: struct.SchemaClass.html pub struct Schema { classes: BptreeMap, attributes: BptreeMap, idxmeta: BptreeMap, } +/// A writable transaction of the working schema set. You should not change this directly, +/// the writability is for the server internally to allow reloading of the schema. Changes +/// you make will be lost when the server re-reads the schema from disk. pub struct SchemaWriteTransaction<'a> { classes: BptreeMapWriteTxn<'a, String, SchemaClass>, attributes: BptreeMapWriteTxn<'a, String, SchemaAttribute>, idxmeta: BptreeMapWriteTxn<'a, String, IndexType>, } +/// A readonly transaction of the working schema set. pub struct SchemaReadTransaction { classes: BptreeMapReadTxn, attributes: BptreeMapReadTxn, idxmeta: BptreeMapReadTxn, } +/// An item reperesenting an attribute and the rules that enforce it. These rules enforce if an +/// attribute on an [`Entry`] may be single or multi value, must be unique amongst all other types +/// of this attribute, if the attribute should be [`indexed`], and what type of data [`syntax`] it may hold. +/// +/// [`Entry`]: ../entry/index.html +/// [`indexed`]: ../value/enum.IndexType.html +/// [`syntax`]: ../value/enum.SyntaxType.html #[derive(Debug, Clone)] pub struct SchemaAttribute { // Is this ... used? @@ -403,6 +442,23 @@ impl SchemaAttribute { } } +/// An item reperesenting a class and the rules for that class. These rules enforce that an +/// [`Entry`]'s avas conform to a set of requirements, giving structure to an entry about +/// what avas must or may exist. The kanidm project provides attributes in `systemmust` and +/// `systemmay`, which can not be altered. An administrator may extend these in the `must` +/// and `may` attributes. +/// +/// Classes are additive, meaning that if there are two classes, the `may` rules of both union, +/// and that if an attribute is `must` on one class, and `may` in another, the `must` rule +/// takes precedence. It is not possible to combine classes in an incompatible way due to these +/// rules. +/// +/// That in mind, and entry that has one of every possible class would probably be nonsensical, +/// but the addition rules make it easy to construct and understand with concepts like [`access`] +/// controls or accounts and posix extensions. +/// +/// [`Entry`]: ../entry/index.html +/// [`access`]: ../access/index.html #[derive(Debug, Clone)] pub struct SchemaClass { // Is this used? diff --git a/kanidmd/src/lib/server.rs b/kanidmd/src/lib/server.rs index 7dd8e6bf9..0267e1aca 100644 --- a/kanidmd/src/lib/server.rs +++ b/kanidmd/src/lib/server.rs @@ -1,3 +1,6 @@ +//! `server` contains the query server, which is the main high level construction +//! to coordinate queries and operations in the server. + // This is really only used for long lived, high level types that need clone // that otherwise can't be cloned. Think Mutex. // use actix::prelude::*; @@ -46,6 +49,15 @@ lazy_static! { // This is the core of the server. It implements all // the search and modify actions, applies access controls // and get's everything ready to push back to the fe code +/// The `QueryServerTransaction` trait provides a set of common read only operations to be +/// shared between [`QueryServerReadTransaction`] and [`QueryServerWriteTransaction`]s. +/// +/// These operations tend to be high level constructions, generally different types of searches +/// that are capable of taking different types of parameters and applying access controls or not, +/// impersonating accounts, or bypassing these via internal searches. +/// +/// [`QueryServerReadTransaction`]: struct.QueryServerReadTransaction.html +/// [`QueryServerWriteTransaction`]: struct.QueryServerWriteTransaction.html pub trait QueryServerTransaction { type BackendTransactionType: BackendTransaction; fn get_be_txn(&self) -> &Self::BackendTransactionType; @@ -56,6 +68,15 @@ pub trait QueryServerTransaction { type AccessControlsTransactionType: AccessControlsTransaction; fn get_accesscontrols(&self) -> &Self::AccessControlsTransactionType; + /// Conduct a search and apply access controls to yield a set of entries that + /// have been reduced to the set of user visible avas. Note that if you provide + /// a `SearchEvent` for the internal user, this query will fail. It is invalid for + /// the [`access`] module to attempt to reduce avas for internal searches, and you + /// should use [`fn search`] instead. + /// + /// [`SearchEvent`]: ../event/struct.SearchEvent.html + /// [`access`]: ../access/index.html + /// [`fn search`]: trait.QueryServerTransaction.html#method.search fn search_ext( &self, au: &mut AuditScope, @@ -79,6 +100,7 @@ pub trait QueryServerTransaction { Ok(entries_filtered) } + fn search( &self, au: &mut AuditScope,