In progress logging design (#231)

This commit is contained in:
Firstyear 2020-05-15 12:23:36 +10:00 committed by GitHub
parent 2670b6ae83
commit 16267e8a29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 192 additions and 65 deletions

75
Cargo.lock generated
View file

@ -227,7 +227,7 @@ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)",
"threadpool 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"threadpool 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -325,14 +325,6 @@ name = "adler32"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ahash"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"const-random 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "aho-corasick"
version = "0.7.10"
@ -376,11 +368,6 @@ dependencies = [
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "autocfg"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "autocfg"
version = "1.0.0"
@ -587,7 +574,7 @@ dependencies = [
[[package]]
name = "clap"
version = "2.33.0"
version = "2.33.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -605,35 +592,16 @@ dependencies = [
[[package]]
name = "concread"
version = "0.1.9"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-epoch 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lru 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"num 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "const-random"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"const-random-macro 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro-hack 0.5.15 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "const-random-macro"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro-hack 0.5.15 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
@ -705,7 +673,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
"cast 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"criterion-plot 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"csv 1.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1173,15 +1141,6 @@ name = "half"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "hashbrown"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ahash 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)",
"autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "heck"
version = "0.3.1"
@ -1378,7 +1337,7 @@ dependencies = [
"base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)",
"cargo-husky 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
"concread 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"concread 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"cookie 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)",
"criterion 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1549,14 +1508,6 @@ dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lru"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lru-cache"
version = "0.1.2"
@ -2519,7 +2470,7 @@ name = "structopt"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt-derive 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -2606,7 +2557,7 @@ dependencies = [
[[package]]
name = "threadpool"
version = "1.8.0"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3108,14 +3059,12 @@ dependencies = [
"checksum actix-web-codegen 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4f00371942083469785f7e28c540164af1913ee7c96a4534acb9cea92c39f057"
"checksum actix_derive 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b95aceadaf327f18f0df5962fedc1bde2f870566a0b9f65c89508a3b1f79334c"
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum ahash 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "6f33b5018f120946c1dcf279194f238a9f146725593ead1c08fa47ff22b0b5d3"
"checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
"checksum arc-swap 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "b585a98a234c46fc563103e9278c9391fde1f4e6850334da895d27edb9580f62"
"checksum arrayref 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
"checksum arrayvec 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
"checksum async-trait 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)" = "da71fef07bc806586090247e971229289f64c210a278ee5ae419314eb386b31d"
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
"checksum awc 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d7601d4d1d7ef2335d6597a41b5fe069f6ab799b85f53565ab390e7b7065aac5"
"checksum backtrace 0.3.46 (registry+https://github.com/rust-lang/crates.io-index)" = "b1e692897359247cc6bb902933361652380af0f1b7651ae5c5013407f30e109e"
@ -3143,11 +3092,9 @@ dependencies = [
"checksum cc 1.0.52 (registry+https://github.com/rust-lang/crates.io-index)" = "c3d87b23d6a92cd03af510a5ade527033f6aa6fa92161e2d5863a907d4c5e31d"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2"
"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum concread 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "71a3779b266b0477a94e1fc0db50e27f928c1e1f14bf72d4aca1b260f443026b"
"checksum const-random 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "2f1af9ac737b2dd2d577701e59fd09ba34822f6f2ebdb30a7647405d9e55e16a"
"checksum const-random-macro 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "25e4c606eb459dd29f7c57b2e0879f2b6f14ee130918c2b78ccb58a9624e6c7a"
"checksum concread 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cce49cb5f272b6ea1ceecf0a7cda68ef672dd0669cf07a651453801eed7b0b48"
"checksum constant_time_eq 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
"checksum cookie 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
"checksum cookie 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0c60ef6d0bbf56ad2674249b6bb74f2c6aeb98b98dd57b5d3e37cace33011d69"
@ -3207,7 +3154,6 @@ dependencies = [
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum h2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "79b7246d7e4b979c03fa093da39cfb3617a96bbeee6310af63991668d7e843ff"
"checksum half 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177"
"checksum hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8e6073d0ca812575946eb5f35ff68dbe519907b25c42530389ff946dc84c6ead"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
"checksum hostname 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867"
@ -3237,7 +3183,6 @@ dependencies = [
"checksum linked-hash-map 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a"
"checksum lock_api 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75"
"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
"checksum lru 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0609345ddee5badacf857d4f547e0e5a2e987db77085c24cd887f73573a04237"
"checksum lru-cache 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c"
"checksum match_cfg 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
@ -3352,7 +3297,7 @@ dependencies = [
"checksum termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
"checksum threadpool 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e8dae184447c15d5a6916d973c642aec485105a13cd238192a6927ae3e077d66"
"checksum threadpool 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
"checksum time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
"checksum time 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)" = "e1330829d2e6c06771eeae476be12ff1aa9eb5e29ca718a431ab27168efde6c1"
"checksum time-macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9ae9b6e9f095bc105e183e3cd493d72579be3181ad4004fceb01adbe9eecab2d"

170
designs/logging.rst Normal file
View file

@ -0,0 +1,170 @@
Logging Design (Refactor)
-------------------------
Logging is how the server communicates to developers and administrators about the state
of the service, and how operations are performing and what they are doing. It's important
this is clear in how it communicates. Today (2020-05-12) the log has been written with
development in mind, and has a structure that has as a result, become hard to parse and
understand. This has motivated a rewrite of logging to improve how the servers state
and errors are communicated to users.
Use Cases
---------
* Developer Bug Reports
A developer should be able to see the internal state of the server, and why any decision
or logic path was taken, so that errors or logic can be analysed post-incident. The
information in the log and the source code should be enough to resolve any issues, as we
may not have LLDB access to any consumers site, or any effective reproducer.
* Security Audits
We must be able to see why any security decision was made, such as credential validation,
access control application, or group/claim issuing to a session. This should be connected
to the IP and other identifiers of the caller.
* Error Analysis
For an administrator, they must be able to determine why an operation is failing in detail
so they can advise on how a consumer or user could change their behaviour to improve the
situation (beyond the error messages we return).
* Performance
Administrators and Developers should be able to analyse fine grained information about the
performance of any operation, and make informed decisions about tuning (such as caches or
or threads), and developers should be able to identify code paths that are under pressure
and could be targets for improvement.
* Containers/Systemd
Logs should be emitted on stdout/stderr as this is the easiest interface for existing
log aggregation systems to collect data from.
Details
-------
As developers we should indicate what messages are relevant to what use case as part of the
message. Log levels are used in other services, but that allows messages to be missed. Instead
we log every "service" always, but filter them to different locations.
This leads to the following log categories:
* Analysis
* Display of all logic branches and why decision points or paths taken
* A unique event ID that associates related log messages
* Performance
* Cache and DB metrics available
* Performance frames of timing of key points
* Structure of the performance frames to understand the execution paths taken.
* Display of query optimisation
* Display of query planning and application
* Failure (server failure)
* Hard Errors
* Warning (admin should take action)
* Possible misconfiguration
* OperationError (user mistake, op mistake etc)
* All error reports and finalised result summaries logged
* The unique event ID is provided in any operation success or failure.
* Security (aka audit)
* Filtering of security sensitive attributes (via debug/display features)
* Display of sufficent information to establish a security picture of connected actions via the user's uuid/session id.
* Tracking of who-changed-what-when-why
* Replication
* TODO
It can be seen pretty quickly that multiple message types are useful across categories. For
example, the unique event id for all messages, how hard errors affect operation errors
or how an operation error can come from a security denial.
Logging must also remain a seperate thread and async for performance.
This means that the best way to declare these logs is a unified log which can be filtered based
on the admins or consumers needs.
API
---
For all types, it's important that we can associate all related events correctly. When the
operation initiates we assign an event-id that is part of the audit trail.
Statistics
==========
Stats should be accumulated in a statistics variable so that we can determine possible
tuning and other events related. Useful stats would be:
* Cache Hits
* Cache Misses
* Cache Inclusions
* Number of Searches
* Number of Entries Modified
This would be then logged as a structured line such as:
{ 'entry_cache_miss': 8, 'idl_cache_miss': 8, 'entry_cache_hit': 16', .... }
This would also then be fed back to the global stats thread for averaging.
Performance
===========
The key metric for performance is time-in-function so it would be good to be able to
build a value like:
{
'name': 'do_search',
'time': x,
'pct': 100,
called: [
{
'name': 'filter2idl',
'time': x',
called: [],
},
{
...
}
]
}
This would allow a rich view of how much time went to any function at a high level, as then
further investigation can occur.
Query Analysis
==============
To analyse a query we need:
* The original query
* The optimised version, with index tagging/profiling choices.
* The idl's that were loaded and how the query was applied
* The idl of the final result set.
Security Events
===============
* What access controls were considered?
* Who authenticated and where from?
* Audit of who modified what when why.
Analysis
========
This is generally what is "debug" logging, which is just decision points and verbose
descriptions of what we went where.
Admin Notification
==================
This is warnings or errors that the admin should be aware of.
User Events
===========
This must associate what happened for a user

9
designs/migration.rst Normal file
View file

@ -0,0 +1,9 @@
Migration / Sync from other Software
------------------------------------
TBD
* Password Sync/Import
* Name/Details
* Groups

View file

@ -1910,6 +1910,8 @@ impl<'a> QueryServerWriteTransaction<'a> {
}
fn reload_schema(&mut self, audit: &mut AuditScope) -> Result<(), OperationError> {
audit_log!(audit, "Schema reload started ...");
// supply entries to the writable schema to reload from.
// find all attributes.
let filt = filter!(f_eq("class", PVCLASS_ATTRIBUTETYPE.clone()));
@ -1956,6 +1958,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
// requirement to have the write query server reference in the parse stage - this
// would cause a rust double-borrow if we had AccessControls to try to handle
// the entry lists themself.
audit_log!(audit, "ACP reload started ...");
// Update search
let filt = filter!(f_and!([