From 16267e8a29e27c312d360e15a5f08977ef692d89 Mon Sep 17 00:00:00 2001 From: Firstyear Date: Fri, 15 May 2020 12:23:36 +1000 Subject: [PATCH] In progress logging design (#231) --- Cargo.lock | 75 +++-------------- designs/logging.rst | 170 ++++++++++++++++++++++++++++++++++++++ designs/migration.rst | 9 ++ kanidmd/src/lib/server.rs | 3 + 4 files changed, 192 insertions(+), 65 deletions(-) create mode 100644 designs/logging.rst create mode 100644 designs/migration.rst diff --git a/Cargo.lock b/Cargo.lock index d7ab62176..b6c81077b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -227,7 +227,7 @@ dependencies = [ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", - "threadpool 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "threadpool 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -325,14 +325,6 @@ name = "adler32" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "ahash" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "const-random 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "aho-corasick" version = "0.7.10" @@ -376,11 +368,6 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "autocfg" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "autocfg" version = "1.0.0" @@ -587,7 +574,7 @@ dependencies = [ [[package]] name = "clap" -version = "2.33.0" +version = "2.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -605,35 +592,16 @@ dependencies = [ [[package]] name = "concread" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "crossbeam-epoch 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lru 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "num 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "const-random" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "const-random-macro 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro-hack 0.5.15 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "const-random-macro" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro-hack 0.5.15 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "constant_time_eq" version = "0.1.5" @@ -705,7 +673,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "cast 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion-plot 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "csv 1.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1173,15 +1141,6 @@ name = "half" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "hashbrown" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "ahash 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", - "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "heck" version = "0.3.1" @@ -1378,7 +1337,7 @@ dependencies = [ "base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "cargo-husky 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", - "concread 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "concread 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", "cookie 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)", "criterion 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1549,14 +1508,6 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "lru" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "lru-cache" version = "0.1.2" @@ -2519,7 +2470,7 @@ name = "structopt" version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "structopt-derive 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -2606,7 +2557,7 @@ dependencies = [ [[package]] name = "threadpool" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3108,14 +3059,12 @@ dependencies = [ "checksum actix-web-codegen 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4f00371942083469785f7e28c540164af1913ee7c96a4534acb9cea92c39f057" "checksum actix_derive 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b95aceadaf327f18f0df5962fedc1bde2f870566a0b9f65c89508a3b1f79334c" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" -"checksum ahash 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "6f33b5018f120946c1dcf279194f238a9f146725593ead1c08fa47ff22b0b5d3" "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum arc-swap 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "b585a98a234c46fc563103e9278c9391fde1f4e6850334da895d27edb9580f62" "checksum arrayref 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" "checksum arrayvec 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" "checksum async-trait 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)" = "da71fef07bc806586090247e971229289f64c210a278ee5ae419314eb386b31d" "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" "checksum awc 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d7601d4d1d7ef2335d6597a41b5fe069f6ab799b85f53565ab390e7b7065aac5" "checksum backtrace 0.3.46 (registry+https://github.com/rust-lang/crates.io-index)" = "b1e692897359247cc6bb902933361652380af0f1b7651ae5c5013407f30e109e" @@ -3143,11 +3092,9 @@ dependencies = [ "checksum cc 1.0.52 (registry+https://github.com/rust-lang/crates.io-index)" = "c3d87b23d6a92cd03af510a5ade527033f6aa6fa92161e2d5863a907d4c5e31d" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" -"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -"checksum concread 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "71a3779b266b0477a94e1fc0db50e27f928c1e1f14bf72d4aca1b260f443026b" -"checksum const-random 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "2f1af9ac737b2dd2d577701e59fd09ba34822f6f2ebdb30a7647405d9e55e16a" -"checksum const-random-macro 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "25e4c606eb459dd29f7c57b2e0879f2b6f14ee130918c2b78ccb58a9624e6c7a" +"checksum concread 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cce49cb5f272b6ea1ceecf0a7cda68ef672dd0669cf07a651453801eed7b0b48" "checksum constant_time_eq 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" "checksum cookie 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5" "checksum cookie 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0c60ef6d0bbf56ad2674249b6bb74f2c6aeb98b98dd57b5d3e37cace33011d69" @@ -3207,7 +3154,6 @@ dependencies = [ "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum h2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "79b7246d7e4b979c03fa093da39cfb3617a96bbeee6310af63991668d7e843ff" "checksum half 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177" -"checksum hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8e6073d0ca812575946eb5f35ff68dbe519907b25c42530389ff946dc84c6ead" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4" "checksum hostname 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" @@ -3237,7 +3183,6 @@ dependencies = [ "checksum linked-hash-map 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a" "checksum lock_api 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" "checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" -"checksum lru 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0609345ddee5badacf857d4f547e0e5a2e987db77085c24cd887f73573a04237" "checksum lru-cache 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" "checksum match_cfg 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" @@ -3352,7 +3297,7 @@ dependencies = [ "checksum termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" -"checksum threadpool 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e8dae184447c15d5a6916d973c642aec485105a13cd238192a6927ae3e077d66" +"checksum threadpool 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" "checksum time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" "checksum time 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)" = "e1330829d2e6c06771eeae476be12ff1aa9eb5e29ca718a431ab27168efde6c1" "checksum time-macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9ae9b6e9f095bc105e183e3cd493d72579be3181ad4004fceb01adbe9eecab2d" diff --git a/designs/logging.rst b/designs/logging.rst new file mode 100644 index 000000000..6945594a0 --- /dev/null +++ b/designs/logging.rst @@ -0,0 +1,170 @@ +Logging Design (Refactor) +------------------------- + +Logging is how the server communicates to developers and administrators about the state +of the service, and how operations are performing and what they are doing. It's important +this is clear in how it communicates. Today (2020-05-12) the log has been written with +development in mind, and has a structure that has as a result, become hard to parse and +understand. This has motivated a rewrite of logging to improve how the servers state +and errors are communicated to users. + +Use Cases +--------- + +* Developer Bug Reports + +A developer should be able to see the internal state of the server, and why any decision +or logic path was taken, so that errors or logic can be analysed post-incident. The +information in the log and the source code should be enough to resolve any issues, as we +may not have LLDB access to any consumers site, or any effective reproducer. + +* Security Audits + +We must be able to see why any security decision was made, such as credential validation, +access control application, or group/claim issuing to a session. This should be connected +to the IP and other identifiers of the caller. + +* Error Analysis + +For an administrator, they must be able to determine why an operation is failing in detail +so they can advise on how a consumer or user could change their behaviour to improve the +situation (beyond the error messages we return). + +* Performance + +Administrators and Developers should be able to analyse fine grained information about the +performance of any operation, and make informed decisions about tuning (such as caches or +or threads), and developers should be able to identify code paths that are under pressure +and could be targets for improvement. + +* Containers/Systemd + +Logs should be emitted on stdout/stderr as this is the easiest interface for existing +log aggregation systems to collect data from. + +Details +------- + +As developers we should indicate what messages are relevant to what use case as part of the +message. Log levels are used in other services, but that allows messages to be missed. Instead +we log every "service" always, but filter them to different locations. + +This leads to the following log categories: + +* Analysis + * Display of all logic branches and why decision points or paths taken + * A unique event ID that associates related log messages +* Performance + * Cache and DB metrics available + * Performance frames of timing of key points + * Structure of the performance frames to understand the execution paths taken. + * Display of query optimisation + * Display of query planning and application +* Failure (server failure) + * Hard Errors +* Warning (admin should take action) + * Possible misconfiguration +* OperationError (user mistake, op mistake etc) + * All error reports and finalised result summaries logged + * The unique event ID is provided in any operation success or failure. +* Security (aka audit) + * Filtering of security sensitive attributes (via debug/display features) + * Display of sufficent information to establish a security picture of connected actions via the user's uuid/session id. + * Tracking of who-changed-what-when-why +* Replication + * TODO + +It can be seen pretty quickly that multiple message types are useful across categories. For +example, the unique event id for all messages, how hard errors affect operation errors +or how an operation error can come from a security denial. + +Logging must also remain a seperate thread and async for performance. + +This means that the best way to declare these logs is a unified log which can be filtered based +on the admins or consumers needs. + +API +--- + +For all types, it's important that we can associate all related events correctly. When the +operation initiates we assign an event-id that is part of the audit trail. + +Statistics +========== + +Stats should be accumulated in a statistics variable so that we can determine possible +tuning and other events related. Useful stats would be: + +* Cache Hits +* Cache Misses +* Cache Inclusions + +* Number of Searches +* Number of Entries Modified + +This would be then logged as a structured line such as: + + { 'entry_cache_miss': 8, 'idl_cache_miss': 8, 'entry_cache_hit': 16', .... } + +This would also then be fed back to the global stats thread for averaging. + +Performance +=========== + +The key metric for performance is time-in-function so it would be good to be able to +build a value like: + + { + 'name': 'do_search', + 'time': x, + 'pct': 100, + called: [ + { + 'name': 'filter2idl', + 'time': x', + called: [], + }, + { + ... + } + ] + } + +This would allow a rich view of how much time went to any function at a high level, as then +further investigation can occur. + +Query Analysis +============== + +To analyse a query we need: + +* The original query +* The optimised version, with index tagging/profiling choices. +* The idl's that were loaded and how the query was applied +* The idl of the final result set. + +Security Events +=============== + +* What access controls were considered? +* Who authenticated and where from? +* Audit of who modified what when why. + +Analysis +======== + +This is generally what is "debug" logging, which is just decision points and verbose +descriptions of what we went where. + +Admin Notification +================== + +This is warnings or errors that the admin should be aware of. + +User Events +=========== + +This must associate what happened for a user + + + diff --git a/designs/migration.rst b/designs/migration.rst new file mode 100644 index 000000000..4a90a0b22 --- /dev/null +++ b/designs/migration.rst @@ -0,0 +1,9 @@ +Migration / Sync from other Software +------------------------------------ + +TBD + +* Password Sync/Import +* Name/Details +* Groups + diff --git a/kanidmd/src/lib/server.rs b/kanidmd/src/lib/server.rs index 5ddf3b260..6e802bc04 100644 --- a/kanidmd/src/lib/server.rs +++ b/kanidmd/src/lib/server.rs @@ -1910,6 +1910,8 @@ impl<'a> QueryServerWriteTransaction<'a> { } fn reload_schema(&mut self, audit: &mut AuditScope) -> Result<(), OperationError> { + audit_log!(audit, "Schema reload started ..."); + // supply entries to the writable schema to reload from. // find all attributes. let filt = filter!(f_eq("class", PVCLASS_ATTRIBUTETYPE.clone())); @@ -1956,6 +1958,7 @@ impl<'a> QueryServerWriteTransaction<'a> { // requirement to have the write query server reference in the parse stage - this // would cause a rust double-borrow if we had AccessControls to try to handle // the entry lists themself. + audit_log!(audit, "ACP reload started ..."); // Update search let filt = filter!(f_and!([