mirror of
https://github.com/kanidm/kanidm.git
synced 2025-04-17 15:55:40 +02:00
Moving daemon tracing to OpenTelemetry (#2292)
* sally forth into the great otel unknown * make the build env identification slightly more durable * docs updates * wasm recompile
This commit is contained in:
parent
3bd2cc8a9f
commit
60e5935faa
Cargo.lockCargo.tomlMakefile
book/src
examples
libs
scripts/otel
README.mddocker-compose.ymlgrafana-datasources.yamlloki-local-config.yamlmulti_curl.shprometheus.ymlstartup.shtempo.yaml
server
core/src
daemon
lib/src
web_ui
229
Cargo.lock
generated
229
Cargo.lock
generated
|
@ -1130,6 +1130,8 @@ dependencies = [
|
|||
"kanidm_proto",
|
||||
"kanidm_utils_users",
|
||||
"kanidmd_core",
|
||||
"opentelemetry",
|
||||
"opentelemetry_api",
|
||||
"reqwest",
|
||||
"sd-notify",
|
||||
"serde",
|
||||
|
@ -1138,6 +1140,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-util",
|
||||
"toml",
|
||||
"tracing",
|
||||
"whoami",
|
||||
]
|
||||
|
||||
|
@ -1801,6 +1804,16 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gethostname"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.11"
|
||||
|
@ -2672,6 +2685,18 @@ dependencies = [
|
|||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-timeout"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
|
||||
dependencies = [
|
||||
"hyper",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.5.0"
|
||||
|
@ -3340,7 +3365,6 @@ dependencies = [
|
|||
"serde-wasm-bindgen 0.5.0",
|
||||
"serde_json",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
|
@ -3363,7 +3387,6 @@ dependencies = [
|
|||
"serde-wasm-bindgen 0.5.0",
|
||||
"serde_json",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
|
@ -3410,7 +3433,6 @@ dependencies = [
|
|||
"serde-wasm-bindgen 0.5.0",
|
||||
"serde_json",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
|
@ -4109,6 +4131,109 @@ dependencies = [
|
|||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9591d937bc0e6d2feb6f71a559540ab300ea49955229c347a517a28d27784c54"
|
||||
dependencies = [
|
||||
"opentelemetry_api",
|
||||
"opentelemetry_sdk",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-http"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7594ec0e11d8e33faf03530a4c49af7064ebba81c1480e01be67d90b356508b"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"http",
|
||||
"opentelemetry_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-otlp"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e5e5a5c4135864099f3faafbe939eb4d7f9b80ebf68a8448da961b32a7c1275"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-core",
|
||||
"http",
|
||||
"opentelemetry-http",
|
||||
"opentelemetry-proto",
|
||||
"opentelemetry-semantic-conventions",
|
||||
"opentelemetry_api",
|
||||
"opentelemetry_sdk",
|
||||
"prost",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tonic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1e3f814aa9f8c905d0ee4bde026afd3b2577a97c10e1699912e3e44f0c4cbeb"
|
||||
dependencies = [
|
||||
"opentelemetry_api",
|
||||
"opentelemetry_sdk",
|
||||
"prost",
|
||||
"tonic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73c9f9340ad135068800e7f1b24e9e09ed9e7143f5bf8518ded3d3ec69789269"
|
||||
dependencies = [
|
||||
"opentelemetry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry_api"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"indexmap 1.9.3",
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"thiserror",
|
||||
"urlencoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry_sdk"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa8e705a0612d48139799fcbaba0d4a90f06277153e43dd2bdc16c6f0edd8026"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"crossbeam-channel",
|
||||
"futures-channel",
|
||||
"futures-executor",
|
||||
"futures-util",
|
||||
"once_cell",
|
||||
"opentelemetry_api",
|
||||
"ordered-float",
|
||||
"percent-encoding",
|
||||
"rand",
|
||||
"regex",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "orca"
|
||||
version = "1.1.0-rc.15-dev"
|
||||
|
@ -4138,6 +4263,15 @@ dependencies = [
|
|||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "3.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
|
@ -4554,6 +4688,29 @@ dependencies = [
|
|||
"wasm-bindgen-futures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.11.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-derive"
|
||||
version = "0.11.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.10.5",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psl-types"
|
||||
version = "2.0.11"
|
||||
|
@ -5289,10 +5446,18 @@ dependencies = [
|
|||
name = "sketching"
|
||||
version = "1.1.0-rc.15-dev"
|
||||
dependencies = [
|
||||
"gethostname",
|
||||
"num_enum",
|
||||
"opentelemetry",
|
||||
"opentelemetry-otlp",
|
||||
"opentelemetry_sdk",
|
||||
"rand",
|
||||
"serde",
|
||||
"tracing",
|
||||
"tracing-forest",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5642,6 +5807,16 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-io-timeout"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "2.1.0"
|
||||
|
@ -5727,6 +5902,34 @@ dependencies = [
|
|||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum",
|
||||
"base64 0.21.5",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-timeout",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.13"
|
||||
|
@ -5735,10 +5938,14 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
|
|||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"indexmap 1.9.3",
|
||||
"pin-project",
|
||||
"pin-project-lite",
|
||||
"rand",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
|
@ -5841,6 +6048,22 @@ dependencies = [
|
|||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-opentelemetry"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75327c6b667828ddc28f5e3f169036cb793c3f588d83bf0f262a7f062ffed3c8"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"smallvec",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-serde"
|
||||
version = "0.1.3"
|
||||
|
|
19
Cargo.toml
19
Cargo.toml
|
@ -162,6 +162,25 @@ num_enum = "^0.5.11"
|
|||
oauth2_ext = { version = "^4.1.0", package = "oauth2", default-features = false }
|
||||
openssl-sys = "^0.9"
|
||||
openssl = "^0.10.59"
|
||||
|
||||
opentelemetry = { version = "0.20.0" }
|
||||
opentelemetry_api = { version = "0.20.0", features = ["logs", "metrics"] }
|
||||
opentelemetry-otlp = { version = "0.13.0", default-features = false, features = [
|
||||
"serde",
|
||||
"logs",
|
||||
"metrics",
|
||||
"http-proto",
|
||||
"grpc-tonic",
|
||||
] }
|
||||
opentelemetry_sdk = "0.20.0"
|
||||
opentelemetry-stdout = { version = "0.1.0", features = [
|
||||
"logs",
|
||||
"metrics",
|
||||
"trace",
|
||||
] }
|
||||
tonic = "0.10.2"
|
||||
tracing-opentelemetry = "0.21.0"
|
||||
|
||||
paste = "^1.0.14"
|
||||
pkg-config = "^0.3.27"
|
||||
proc-macro2 = "1.0.69"
|
||||
|
|
1
Makefile
1
Makefile
|
@ -140,6 +140,7 @@ codespell:
|
|||
--skip='./book/src/images/*' \
|
||||
--skip='./docs/*,./.git' \
|
||||
--skip='*.svg' \
|
||||
--skip='*.br' \
|
||||
--skip='./rlm_python/mods-available/eap' \
|
||||
--skip='./server/web_ui/static/external' \
|
||||
--skip='./server/web_ui/pkg/external' \
|
||||
|
|
159
book/src/developers/designs/logging.md
Normal file
159
book/src/developers/designs/logging.md
Normal file
|
@ -0,0 +1,159 @@
|
|||
# Logging
|
||||
|
||||
Logging is how the server communicates to developers and administrators about the state of the
|
||||
service, and how operations are performing and what they are doing. It's important this is clear in
|
||||
how it communicates.
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Developer Bug Reports
|
||||
|
||||
A developer should be able to see the internal state of the server, and why any decision or logic
|
||||
path was taken, so that errors or logic can be analysed post-incident. The information in the log
|
||||
and the source code should be enough to resolve any issues, as we may not have LLDB access to any
|
||||
consumers site, or any effective reproducer.
|
||||
|
||||
### Security Audits
|
||||
|
||||
We must be able to see why any security decision was made, such as credential validation, access
|
||||
control application, or group/claim issuing to a session. This should be connected to the IP and
|
||||
other identifiers of the caller.
|
||||
|
||||
### Error Analysis
|
||||
|
||||
For an administrator, they must be able to determine why an operation is failing in detail so they
|
||||
can advise on how a consumer or user could change their behaviour to improve the situation (beyond
|
||||
the error messages we return).
|
||||
|
||||
### Performance
|
||||
|
||||
Administrators and Developers should be able to analyse fine grained information about the
|
||||
performance of any operation, and make informed decisions about tuning (such as caches or or
|
||||
threads), and developers should be able to identify code paths that are under pressure and could be
|
||||
targets for improvement.
|
||||
|
||||
### Containers/Systemd
|
||||
|
||||
Logs should be emitted on stdout/stderr as this is the easiest interface for existing log
|
||||
aggregation systems to collect data from.
|
||||
|
||||
## Details
|
||||
|
||||
As developers we should indicate what messages are relevant to what use case as part of the message.
|
||||
Log levels are used in other services, but that allows messages to be missed. Instead we always log
|
||||
every "service", but filter them to different locations.
|
||||
|
||||
This leads to the following log categories:
|
||||
|
||||
- Analysis
|
||||
- Display of all logic branches and why decision points or paths taken
|
||||
- A unique event ID that associates related log messages
|
||||
- Performance
|
||||
- Cache and DB metrics available
|
||||
- Performance frames of timing of key points
|
||||
- Structure of the performance frames to understand the execution paths taken.
|
||||
- Display of query optimisation
|
||||
- Display of query planning and application
|
||||
- Failure (server failure)
|
||||
- Hard Errors
|
||||
- Warning (admin should take action)
|
||||
- Possible misconfiguration
|
||||
- OperationError (user mistake, op mistake etc)
|
||||
- All error reports and finalised result summaries logged
|
||||
- The unique event ID is provided in any operation success or failure.
|
||||
- Security (aka audit)
|
||||
- Filtering of security sensitive attributes (via debug/display features)
|
||||
- Display of sufficient information to establish a security picture of connected actions via the
|
||||
user's uuid/session id.
|
||||
- Tracking of who-changed-what-when-why
|
||||
- Replication
|
||||
- Both replication consumers and providers log when they make runs.
|
||||
- Errors in replication should surface as such.
|
||||
|
||||
It can be seen pretty quickly that multiple message types are useful across categories. For example,
|
||||
the unique event id for all messages, how hard errors affect operation errors or how an operation
|
||||
error can come from a security denial.
|
||||
|
||||
Logging must also remain a separate thread and async for performance.
|
||||
|
||||
This means that the best way to declare these logs is a unified log which can be filtered based on
|
||||
the admins or consumers needs.
|
||||
|
||||
## API
|
||||
|
||||
For all types, it's important that we can associate all related events correctly. When the operation
|
||||
initiates we assign an event-id that is part of the audit trail.
|
||||
|
||||
### Statistics
|
||||
|
||||
Stats should be accumulated in a statistics variable so that we can determine possible tuning and
|
||||
other events related. Useful stats would be:
|
||||
|
||||
- Cache Hits
|
||||
- Cache Misses
|
||||
- Cache Inclusions
|
||||
|
||||
- Number of Searches
|
||||
- Number of Entries Modified
|
||||
|
||||
This would be then logged as a structured line such as:
|
||||
|
||||
```json
|
||||
{ "entry_cache_miss": 8, "idl_cache_miss": 8, "entry_cache_hit": 16', .... }
|
||||
```
|
||||
|
||||
This would also then be fed back to the global stats thread for averaging.
|
||||
|
||||
### System Performance
|
||||
|
||||
The key metric for performance is time-in-function so it would be good to be able to build a value
|
||||
like:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "do_search",
|
||||
"time_ns": 130,
|
||||
"pct": 100,
|
||||
"called": [
|
||||
{
|
||||
"name": "filter2idl",
|
||||
"time_ns": 23',
|
||||
"called": [],
|
||||
},
|
||||
{
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This would allow a rich view of how much time went to any function at a high level, as then further
|
||||
investigation can occur.
|
||||
|
||||
### SQL Query Analysis
|
||||
|
||||
To analyse a query we need:
|
||||
|
||||
- The original query
|
||||
- The optimised version, with index tagging/profiling choices.
|
||||
- The idl's that were loaded and how the query was applied
|
||||
- The idl of the final result set.
|
||||
|
||||
### Security Events
|
||||
|
||||
- What access controls were considered?
|
||||
- Who authenticated and where from?
|
||||
- Audit of who modified what when why.
|
||||
|
||||
### Internal Analysis
|
||||
|
||||
This is generally what is "debug" logging, which is just decision points and verbose descriptions of
|
||||
what we went where.
|
||||
|
||||
### Admin Notification
|
||||
|
||||
This is warnings or errors that the admin should be aware of.
|
||||
|
||||
### User Events
|
||||
|
||||
This must associate what happened for a user
|
|
@ -1,170 +0,0 @@
|
|||
Logging Design (Refactor)
|
||||
-------------------------
|
||||
|
||||
Logging is how the server communicates to developers and administrators about the state
|
||||
of the service, and how operations are performing and what they are doing. It's important
|
||||
this is clear in how it communicates. Today (2020-05-12) the log has been written with
|
||||
development in mind, and has a structure that has as a result, become hard to parse and
|
||||
understand. This has motivated a rewrite of logging to improve how the servers state
|
||||
and errors are communicated to users.
|
||||
|
||||
Use Cases
|
||||
---------
|
||||
|
||||
* Developer Bug Reports
|
||||
|
||||
A developer should be able to see the internal state of the server, and why any decision
|
||||
or logic path was taken, so that errors or logic can be analysed post-incident. The
|
||||
information in the log and the source code should be enough to resolve any issues, as we
|
||||
may not have LLDB access to any consumers site, or any effective reproducer.
|
||||
|
||||
* Security Audits
|
||||
|
||||
We must be able to see why any security decision was made, such as credential validation,
|
||||
access control application, or group/claim issuing to a session. This should be connected
|
||||
to the IP and other identifiers of the caller.
|
||||
|
||||
* Error Analysis
|
||||
|
||||
For an administrator, they must be able to determine why an operation is failing in detail
|
||||
so they can advise on how a consumer or user could change their behaviour to improve the
|
||||
situation (beyond the error messages we return).
|
||||
|
||||
* Performance
|
||||
|
||||
Administrators and Developers should be able to analyse fine grained information about the
|
||||
performance of any operation, and make informed decisions about tuning (such as caches or
|
||||
or threads), and developers should be able to identify code paths that are under pressure
|
||||
and could be targets for improvement.
|
||||
|
||||
* Containers/Systemd
|
||||
|
||||
Logs should be emitted on stdout/stderr as this is the easiest interface for existing
|
||||
log aggregation systems to collect data from.
|
||||
|
||||
Details
|
||||
-------
|
||||
|
||||
As developers we should indicate what messages are relevant to what use case as part of the
|
||||
message. Log levels are used in other services, but that allows messages to be missed. Instead
|
||||
we log every "service" always, but filter them to different locations.
|
||||
|
||||
This leads to the following log categories:
|
||||
|
||||
* Analysis
|
||||
* Display of all logic branches and why decision points or paths taken
|
||||
* A unique event ID that associates related log messages
|
||||
* Performance
|
||||
* Cache and DB metrics available
|
||||
* Performance frames of timing of key points
|
||||
* Structure of the performance frames to understand the execution paths taken.
|
||||
* Display of query optimisation
|
||||
* Display of query planning and application
|
||||
* Failure (server failure)
|
||||
* Hard Errors
|
||||
* Warning (admin should take action)
|
||||
* Possible misconfiguration
|
||||
* OperationError (user mistake, op mistake etc)
|
||||
* All error reports and finalised result summaries logged
|
||||
* The unique event ID is provided in any operation success or failure.
|
||||
* Security (aka audit)
|
||||
* Filtering of security sensitive attributes (via debug/display features)
|
||||
* Display of sufficient information to establish a security picture of connected actions via the user's uuid/session id.
|
||||
* Tracking of who-changed-what-when-why
|
||||
* Replication
|
||||
* TODO
|
||||
|
||||
It can be seen pretty quickly that multiple message types are useful across categories. For
|
||||
example, the unique event id for all messages, how hard errors affect operation errors
|
||||
or how an operation error can come from a security denial.
|
||||
|
||||
Logging must also remain a separate thread and async for performance.
|
||||
|
||||
This means that the best way to declare these logs is a unified log which can be filtered based
|
||||
on the admins or consumers needs.
|
||||
|
||||
API
|
||||
---
|
||||
|
||||
For all types, it's important that we can associate all related events correctly. When the
|
||||
operation initiates we assign an event-id that is part of the audit trail.
|
||||
|
||||
Statistics
|
||||
==========
|
||||
|
||||
Stats should be accumulated in a statistics variable so that we can determine possible
|
||||
tuning and other events related. Useful stats would be:
|
||||
|
||||
* Cache Hits
|
||||
* Cache Misses
|
||||
* Cache Inclusions
|
||||
|
||||
* Number of Searches
|
||||
* Number of Entries Modified
|
||||
|
||||
This would be then logged as a structured line such as:
|
||||
|
||||
{ 'entry_cache_miss': 8, 'idl_cache_miss': 8, 'entry_cache_hit': 16', .... }
|
||||
|
||||
This would also then be fed back to the global stats thread for averaging.
|
||||
|
||||
Performance
|
||||
===========
|
||||
|
||||
The key metric for performance is time-in-function so it would be good to be able to
|
||||
build a value like:
|
||||
|
||||
{
|
||||
'name': 'do_search',
|
||||
'time': x,
|
||||
'pct': 100,
|
||||
called: [
|
||||
{
|
||||
'name': 'filter2idl',
|
||||
'time': x',
|
||||
called: [],
|
||||
},
|
||||
{
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
This would allow a rich view of how much time went to any function at a high level, as then
|
||||
further investigation can occur.
|
||||
|
||||
Query Analysis
|
||||
==============
|
||||
|
||||
To analyse a query we need:
|
||||
|
||||
* The original query
|
||||
* The optimised version, with index tagging/profiling choices.
|
||||
* The idl's that were loaded and how the query was applied
|
||||
* The idl of the final result set.
|
||||
|
||||
Security Events
|
||||
===============
|
||||
|
||||
* What access controls were considered?
|
||||
* Who authenticated and where from?
|
||||
* Audit of who modified what when why.
|
||||
|
||||
Analysis
|
||||
========
|
||||
|
||||
This is generally what is "debug" logging, which is just decision points and verbose
|
||||
descriptions of what we went where.
|
||||
|
||||
Admin Notification
|
||||
==================
|
||||
|
||||
This is warnings or errors that the admin should be aware of.
|
||||
|
||||
User Events
|
||||
===========
|
||||
|
||||
This must associate what happened for a user
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
The monitoring design of Kanidm is still very much in its infancy -
|
||||
[take part in the discussion at github.com/kanidm/kanidm/issues/216](https://github.com/kanidm/kanidm/issues/216).
|
||||
|
||||
## kanidmd
|
||||
## kanidmd status endpoint
|
||||
|
||||
kanidmd currently responds to HTTP GET requests at the `/status` endpoint with a JSON object of
|
||||
either "true" or "false". `true` indicates that the platform is responding to requests.
|
||||
|
@ -15,3 +15,27 @@ either "true" or "false". `true` indicates that the platform is responding to re
|
|||
| Additional Headers | x-kanidm-opid |
|
||||
| Content Type | application/json |
|
||||
| Cookies | kanidm-session |
|
||||
|
||||
## OpenTelemetry Tracing
|
||||
|
||||
Configure OTLP trace exports by setting a `otel_grpc_endpoint` in the server configuration. This'll
|
||||
enable [OpenTelemetry traces](https://opentelemetry.io) to be sent for observability use cases.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
#### Max Span Size Exceeded
|
||||
|
||||
On startup, we run some big processes that might hit a "max trace size" in certain configurations.
|
||||
Grafana Tempo defaults to 5MB, which is sensible for most things, but ... 😁
|
||||
|
||||
Grafana Tempo
|
||||
[config to allow larger spans](https://grafana.com/docs/tempo/latest/troubleshooting/response-too-large/):
|
||||
|
||||
```yaml
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
max_recv_msg_size_mib: 20
|
||||
```
|
||||
|
|
|
@ -32,7 +32,7 @@ This means:
|
|||
|
||||
If you see something like this:
|
||||
|
||||
```
|
||||
```shell
|
||||
➜ curl -v https://idm.example.com:8443
|
||||
* Trying 10.0.0.1:8443...
|
||||
* connect to 10.0.0.1 port 8443 failed: Connection refused
|
||||
|
@ -47,7 +47,7 @@ some reason.
|
|||
If you get errors about certificates, try adding `-k` to skip certificate verification checking and
|
||||
just test connectivity:
|
||||
|
||||
```
|
||||
```shell
|
||||
curl -vk https://idm.example.com:8443/status
|
||||
```
|
||||
|
||||
|
@ -87,3 +87,19 @@ to `1.1`. This can go in the same block as the `proxy_pass` option.
|
|||
```text
|
||||
proxy_http_version 1.1
|
||||
```
|
||||
|
||||
## OpenTelemetry errors
|
||||
|
||||
If you see something like this:
|
||||
|
||||
> `OpenTelemetry trace error occurred. Exporter otlp encountered the following error(s): the grpc server returns error (The system is not in a state required for the operation's execution): , detailed error message: TRACE_TOO_LARGE: max size of trace (5000000) exceeded while adding 86725 bytes to trace a657b63f6ca0415eb70b6734f20f82cf for tenant single-tenant`
|
||||
|
||||
Then you'l need to tweak the maximum trace size in your OTLP receiver. In Grafana Tempo you can add
|
||||
the following keys to your `tempo.yaml`, in this example we're setting it to 20MiB:
|
||||
|
||||
```yaml
|
||||
overrides:
|
||||
defaults:
|
||||
global:
|
||||
max_bytes_per_trace: 20971520 # 20MiB
|
||||
```
|
||||
|
|
|
@ -15,6 +15,8 @@ tls_key = "/tmp/kanidm/key.pem"
|
|||
log_level = "debug"
|
||||
# log_level = "trace"
|
||||
|
||||
otel_grpc_url = "http://localhost:4317"
|
||||
|
||||
domain = "localhost"
|
||||
origin = "https://localhost:8443"
|
||||
trust_x_forward_for = true
|
||||
|
|
|
@ -1,14 +1,28 @@
|
|||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{env, fs};
|
||||
|
||||
use base64::{engine::general_purpose, Engine as _};
|
||||
|
||||
// We do this here so it's only actually run and checked once.
|
||||
/// Work out where the workspace dir is
|
||||
fn workspace_dir() -> PathBuf {
|
||||
let output = std::process::Command::new(env!("CARGO"))
|
||||
.arg("locate-project")
|
||||
.arg("--workspace")
|
||||
.arg("--message-format=plain")
|
||||
.output()
|
||||
.unwrap()
|
||||
.stdout;
|
||||
let cargo_path = Path::new(std::str::from_utf8(&output).unwrap().trim());
|
||||
cargo_path.parent().unwrap().to_path_buf()
|
||||
}
|
||||
|
||||
// We do this here so it's only actually run and checked once at build time.
|
||||
fn determine_git_rev() -> Option<String> {
|
||||
let path = PathBuf::from("../../");
|
||||
let repo = match gix::open(path) {
|
||||
let repo = match gix::open(workspace_dir()) {
|
||||
Ok(repo) => repo,
|
||||
Err(_) => return None,
|
||||
Err(_) => {
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let mut head = repo.head().ok()?;
|
||||
let commit = head.peel_to_commit_in_place().ok()?;
|
||||
|
|
|
@ -17,12 +17,26 @@ test = false
|
|||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
gethostname = "0.4.3"
|
||||
num_enum = { workspace = true }
|
||||
opentelemetry = { workspace = true, features = ["metrics", "rt-tokio"] }
|
||||
opentelemetry-otlp = { workspace = true, default-features = false, features = [
|
||||
"serde",
|
||||
"logs",
|
||||
"metrics",
|
||||
"http-proto",
|
||||
"grpc-tonic",
|
||||
] }
|
||||
opentelemetry_sdk = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
tracing = { workspace = true, features = ["attributes"] }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter"] }
|
||||
tracing-forest = { workspace = true, features = [
|
||||
"uuid",
|
||||
"smallvec",
|
||||
"tokio",
|
||||
"env-filter",
|
||||
] }
|
||||
tracing-opentelemetry = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter"] }
|
||||
uuid = { workspace = true, features = ["v4"] }
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
#![deny(warnings)]
|
||||
#![warn(unused_extern_crates)]
|
||||
#![allow(non_snake_case)]
|
||||
use std::str::FromStr;
|
||||
|
||||
use num_enum::{IntoPrimitive, TryFromPrimitive};
|
||||
use serde::Deserialize;
|
||||
use tracing_forest::printer::TestCapturePrinter;
|
||||
use tracing_forest::tag::NoTag;
|
||||
use tracing_forest::util::*;
|
||||
|
@ -9,6 +12,7 @@ use tracing_forest::Tag;
|
|||
use tracing_subscriber::prelude::*;
|
||||
|
||||
pub mod macros;
|
||||
pub mod otel;
|
||||
|
||||
pub use {tracing, tracing_forest, tracing_subscriber};
|
||||
|
||||
|
@ -96,3 +100,47 @@ impl EventTag {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Deserialize, Debug, Default)]
|
||||
pub enum LogLevel {
|
||||
#[default]
|
||||
#[serde(rename = "info")]
|
||||
Info,
|
||||
#[serde(rename = "debug")]
|
||||
Debug,
|
||||
#[serde(rename = "trace")]
|
||||
Trace,
|
||||
}
|
||||
|
||||
impl FromStr for LogLevel {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"info" => Ok(LogLevel::Info),
|
||||
"debug" => Ok(LogLevel::Debug),
|
||||
"trace" => Ok(LogLevel::Trace),
|
||||
_ => Err("Must be one of info, debug, trace"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for LogLevel {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
LogLevel::Info => "info".to_string(),
|
||||
LogLevel::Debug => "debug".to_string(),
|
||||
LogLevel::Trace => "trace".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LogLevel> for EnvFilter {
|
||||
fn from(value: LogLevel) -> Self {
|
||||
match value {
|
||||
LogLevel::Info => EnvFilter::new("info"),
|
||||
LogLevel::Debug => EnvFilter::new("debug"),
|
||||
LogLevel::Trace => EnvFilter::new("trace"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,3 +119,51 @@ macro_rules! filter_trace {
|
|||
macro_rules! perf_trace {
|
||||
($($arg:tt)*) => { tagged_event!(TRACE, EventTag::PerfTrace, $($arg)*) }
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! event_dynamic_lvl {
|
||||
( $(target: $target:expr,)? $(parent: $parent:expr,)? $lvl:expr, $($tt:tt)* ) => {
|
||||
match $lvl {
|
||||
tracing::Level::ERROR => {
|
||||
tracing::event!(
|
||||
$(target: $target,)?
|
||||
$(parent: $parent,)?
|
||||
tracing::Level::ERROR,
|
||||
$($tt)*
|
||||
);
|
||||
}
|
||||
tracing::Level::WARN => {
|
||||
tracing::event!(
|
||||
$(target: $target,)?
|
||||
$(parent: $parent,)?
|
||||
tracing::Level::WARN,
|
||||
$($tt)*
|
||||
);
|
||||
}
|
||||
tracing::Level::INFO => {
|
||||
tracing::event!(
|
||||
$(target: $target,)?
|
||||
$(parent: $parent,)?
|
||||
tracing::Level::INFO,
|
||||
$($tt)*
|
||||
);
|
||||
}
|
||||
tracing::Level::DEBUG => {
|
||||
tracing::event!(
|
||||
$(target: $target,)?
|
||||
$(parent: $parent,)?
|
||||
tracing::Level::DEBUG,
|
||||
$($tt)*
|
||||
);
|
||||
}
|
||||
tracing::Level::TRACE => {
|
||||
tracing::event!(
|
||||
$(target: $target,)?
|
||||
$(parent: $parent,)?
|
||||
tracing::Level::TRACE,
|
||||
$($tt)*
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
132
libs/sketching/src/otel.rs
Normal file
132
libs/sketching/src/otel.rs
Normal file
|
@ -0,0 +1,132 @@
|
|||
use gethostname::gethostname;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_otlp::{Protocol, WithExportConfig};
|
||||
use opentelemetry_sdk::trace::{self, Sampler};
|
||||
use opentelemetry_sdk::Resource;
|
||||
use std::time::Duration;
|
||||
use tracing::Subscriber;
|
||||
use tracing_subscriber::Registry;
|
||||
use tracing_subscriber::{prelude::*, EnvFilter};
|
||||
|
||||
pub const MAX_EVENTS_PER_SPAN: u32 = 64 * 1024;
|
||||
pub const MAX_ATTRIBUTES_PER_SPAN: u32 = 128;
|
||||
|
||||
/// if you set the KANIDM_OTEL_GRPC_ENDPOINT env var you'll start the OpenTelemetry pipeline.
|
||||
pub fn get_otlp_endpoint() -> Option<String> {
|
||||
std::env::var("KANIDM_OTEL_GRPC_ENDPOINT").ok()
|
||||
}
|
||||
|
||||
// TODO: this is coming back later
|
||||
// #[allow(dead_code)]
|
||||
// pub fn init_metrics() -> metrics::Result<MeterProvider> {
|
||||
// let export_config = opentelemetry_otlp::ExportConfig {
|
||||
// endpoint: "http://localhost:4318/v1/metrics".to_string(),
|
||||
// ..opentelemetry_otlp::ExportConfig::default()
|
||||
// };
|
||||
// opentelemetry_otlp::new_pipeline()
|
||||
// .metrics(opentelemetry_sdk::runtime::Tokio)
|
||||
// .with_exporter(
|
||||
// opentelemetry_otlp::new_exporter()
|
||||
// .http()
|
||||
// .with_export_config(export_config),
|
||||
// )
|
||||
// .build()
|
||||
// }
|
||||
|
||||
/// This does all the startup things for the logging pipeline
|
||||
pub fn start_logging_pipeline(
|
||||
otlp_endpoint: Option<String>,
|
||||
log_filter: crate::LogLevel,
|
||||
service_name: String,
|
||||
) -> Result<Box<dyn Subscriber + Send + Sync>, String> {
|
||||
let forest_filter: EnvFilter = log_filter.into();
|
||||
|
||||
// TODO: work out how to do metrics things
|
||||
// let meter_provider = init_metrics()
|
||||
// .map_err(|err| eprintln!("failed to start metrics provider: {:?}", err))?;
|
||||
|
||||
match otlp_endpoint {
|
||||
Some(endpoint) => {
|
||||
// adding these filters because when you close out the process the OTLP comms layer is NOISY
|
||||
let forest_filter = forest_filter
|
||||
.add_directive(
|
||||
"tonic=info"
|
||||
.parse()
|
||||
.expect("Failed to set tonic logging to info"),
|
||||
)
|
||||
.add_directive("h2=info".parse().expect("Failed to set h2 logging to info"))
|
||||
.add_directive(
|
||||
"hyper=info"
|
||||
.parse()
|
||||
.expect("Failed to set hyper logging to info"),
|
||||
);
|
||||
let forest_layer = tracing_forest::ForestLayer::default().with_filter(forest_filter);
|
||||
let t_filter: EnvFilter = log_filter.into();
|
||||
|
||||
let tracer = opentelemetry_otlp::new_pipeline().tracing().with_exporter(
|
||||
opentelemetry_otlp::new_exporter()
|
||||
.tonic()
|
||||
.with_endpoint(endpoint)
|
||||
.with_timeout(Duration::from_secs(5))
|
||||
.with_protocol(Protocol::HttpBinary),
|
||||
);
|
||||
|
||||
// this env var gets set at build time, if we can pull it, add it to the metadata
|
||||
let git_rev = match option_env!("KANIDM_KANIDM_PKG_COMMIT_REV") {
|
||||
Some(rev) => format!("-{}", rev),
|
||||
None => "".to_string(),
|
||||
};
|
||||
|
||||
let version = format!("{}{}", env!("CARGO_PKG_VERSION"), git_rev);
|
||||
let hostname = gethostname();
|
||||
let hostname = hostname.to_string_lossy();
|
||||
let hostname = hostname.to_lowercase();
|
||||
|
||||
let tracer = tracer
|
||||
.with_trace_config(
|
||||
trace::config()
|
||||
// we want *everything!*
|
||||
.with_sampler(Sampler::AlwaysOn)
|
||||
.with_max_events_per_span(MAX_EVENTS_PER_SPAN)
|
||||
.with_max_attributes_per_span(MAX_ATTRIBUTES_PER_SPAN)
|
||||
.with_resource(Resource::new(vec![
|
||||
KeyValue::new("service.name", service_name),
|
||||
KeyValue::new("service.version", version),
|
||||
KeyValue::new("host.name", hostname),
|
||||
// TODO: it'd be really nice to be able to set the instance ID here, from the server UUID so we know *which* instance on this host is logging
|
||||
])),
|
||||
)
|
||||
.install_batch(opentelemetry::runtime::Tokio)
|
||||
.map_err(|err| {
|
||||
let err = format!("Failed to start OTLP pipeline: {:?}", err);
|
||||
eprintln!("{}", err);
|
||||
err
|
||||
})?;
|
||||
// Create a tracing layer with the configured tracer;
|
||||
let telemetry = tracing_opentelemetry::layer()
|
||||
.with_tracer(tracer)
|
||||
.with_threads(true)
|
||||
.with_filter(t_filter);
|
||||
|
||||
Ok(Box::new(
|
||||
Registry::default().with(forest_layer).with(telemetry),
|
||||
))
|
||||
}
|
||||
None => {
|
||||
let forest_layer = tracing_forest::ForestLayer::default().with_filter(forest_filter);
|
||||
Ok(Box::new(Registry::default().with(forest_layer)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This helps with cleanly shutting down the tracing/logging providers when done,
|
||||
/// so we don't lose traces.
|
||||
pub struct TracingPipelineGuard {}
|
||||
|
||||
impl Drop for TracingPipelineGuard {
|
||||
fn drop(&mut self) {
|
||||
opentelemetry::global::shutdown_tracer_provider();
|
||||
opentelemetry::global::shutdown_logger_provider();
|
||||
println!("Logging pipeline completed shutdown");
|
||||
}
|
||||
}
|
31
scripts/otel/README.md
Normal file
31
scripts/otel/README.md
Normal file
|
@ -0,0 +1,31 @@
|
|||
# OpenTelemetry for Kanidm
|
||||
|
||||
First, start the containers. You can use docker-compose if you know how, or `./startup.sh` is a
|
||||
shortcut. You'll need docker (or similar) and docker-compose (or something that can handle
|
||||
`docker-compose.yml`).
|
||||
|
||||
Once that's stopped scrolling for a bit, run the Kanidm server, setting the `otel_grpc_url` to
|
||||
`http://localhost:4317`
|
||||
|
||||
Then access the
|
||||
[Grafana UI](http://localhost:3000/explore?panes=%7B%22G-2%22:%7B%22datasource%22:%22tempo%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22datasource%22:%7B%22type%22:%22tempo%22,%22uid%22:%22tempo%22%7D,%22queryType%22:%22traceqlSearch%22,%22limit%22:20,%22filters%22:%5B%7B%22id%22:%2219b1a582%22,%22operator%22:%22%3D%22,%22scope%22:%22span%22%7D,%7B%22id%22:%22service-name%22,%22tag%22:%22service.name%22,%22operator%22:%22%3D%22,%22scope%22:%22resource%22,%22value%22:%5B%22kanidmd%22%5D,%22valueType%22:%22string%22%7D%5D%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1)
|
||||
and start clicking on traces 😁
|
||||
|
||||
## Architecture of the docker containers
|
||||
|
||||
```mermaid
|
||||
graph TD;
|
||||
|
||||
K[Kanidmd] --"OTLP tcp/4317"--> T
|
||||
|
||||
U[User] --tcp/3000--> G[Grafana]
|
||||
G --tcp/3200-->T["Tempo (Traces)"]
|
||||
G --tcp/9090-->P["Prometheus (Metrics)"]
|
||||
|
||||
T--cache-->TDV["Tempo Docker Volume"]
|
||||
T--tcp/9000-->M["Minio (S3 Storage)"]
|
||||
|
||||
P--tcp/9000-->M
|
||||
P--cache-->PDV["Prometheus Docker Volume"]
|
||||
M-->DVM["Minio Docker Volume"]
|
||||
```
|
92
scripts/otel/docker-compose.yml
Normal file
92
scripts/otel/docker-compose.yml
Normal file
|
@ -0,0 +1,92 @@
|
|||
---
|
||||
# It should be *very* clear that this is an insecure, dev-only configuration. Don't run this in production!
|
||||
services:
|
||||
grafana:
|
||||
image: grafana/grafana:10.1.1
|
||||
volumes:
|
||||
- type: bind
|
||||
source: ./grafana-datasources.yaml
|
||||
target: /etc/grafana/provisioning/datasources/datasources.yaml
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
|
||||
- GF_AUTH_DISABLE_LOGIN_FORM=true
|
||||
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor
|
||||
ports:
|
||||
- "3000:3000"
|
||||
tempo:
|
||||
image: grafana/tempo:latest
|
||||
command: [ "-config.file=/etc/tempo.yaml" ]
|
||||
volumes:
|
||||
- type: bind
|
||||
source: ./tempo.yaml
|
||||
target: /etc/tempo.yaml
|
||||
- type: volume
|
||||
source: tempo
|
||||
target: /tmp/tempo
|
||||
ports:
|
||||
# - "14268:14268" # jaeger ingest
|
||||
- "3200:3200" # tempo
|
||||
- "9095:9095" # tempo grpc
|
||||
- "4317:4317" # otlp grpc
|
||||
# - "4318:4318" # otlp http
|
||||
# - "9411:9411" # zipkin
|
||||
# loki:
|
||||
# image: docker.io/grafana/loki:2.9.2
|
||||
# volumes:
|
||||
# - type: bind
|
||||
# source: ./loki-local-config.yaml
|
||||
# target: /etc/loki/local-config.yaml
|
||||
# command: |
|
||||
# -config.file=/etc/loki/local-config.yaml \
|
||||
# -target=all
|
||||
# ports:
|
||||
# - "3100:3100"
|
||||
# - "3101:3101"
|
||||
# - "3102:3102"
|
||||
minio:
|
||||
image: minio/minio
|
||||
entrypoint:
|
||||
- sh
|
||||
- -euc
|
||||
- |
|
||||
mkdir -p /data/loki-data && \
|
||||
mkdir -p /data/loki-ruler && \
|
||||
mkdir -p /data/tempo && \
|
||||
minio server /data
|
||||
environment:
|
||||
- MINIO_ROOT_USER=loki
|
||||
- MINIO_ROOT_PASSWORD=supersecret
|
||||
- MINIO_PROMETHEUS_AUTH_TYPE=public
|
||||
- MINIO_UPDATE=off
|
||||
ports:
|
||||
- 9000
|
||||
volumes:
|
||||
- type: volume
|
||||
source: minio
|
||||
target: /data
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ]
|
||||
interval: 15s
|
||||
timeout: 20s
|
||||
retries: 5
|
||||
|
||||
prometheus:
|
||||
hostname: prometheus
|
||||
container_name: prometheus
|
||||
image: prom/prometheus:v2.47.2
|
||||
restart: always
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- type: bind
|
||||
source: ./prometheus.yml
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
- type: volume
|
||||
source: prometheus
|
||||
target: /prometheus
|
||||
|
||||
volumes:
|
||||
minio:
|
||||
tempo:
|
||||
prometheus:
|
32
scripts/otel/grafana-datasources.yaml
Normal file
32
scripts/otel/grafana-datasources.yaml
Normal file
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
# It should be *very* clear that this is an insecure, dev-only configuration. Don't run this in production!
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://prometheus:9090
|
||||
basicAuth: false
|
||||
isDefault: false
|
||||
version: 1
|
||||
editable: false
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://tempo:3200
|
||||
basicAuth: false
|
||||
isDefault: true
|
||||
version: 1
|
||||
editable: false
|
||||
apiVersion: 1
|
||||
uid: tempo
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
serviceMap:
|
||||
datasourceUid: prometheus
|
32
scripts/otel/loki-local-config.yaml
Normal file
32
scripts/otel/loki-local-config.yaml
Normal file
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
# It should be *very* clear that this is an insecure, dev-only configuration. Don't run this in production!
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2021-08-01
|
||||
store: tsdb
|
||||
object_store: s3
|
||||
schema: v12
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
common:
|
||||
path_prefix: /loki
|
||||
replication_factor: 1
|
||||
storage:
|
||||
s3:
|
||||
endpoint: minio:9000
|
||||
insecure: true
|
||||
bucketnames: loki-data
|
||||
access_key_id: loki
|
||||
secret_access_key: supersecret
|
||||
s3forcepathstyle: true
|
||||
ring:
|
||||
instance_addr: 0.0.0.0
|
||||
kvstore:
|
||||
store: memberlist
|
||||
ruler:
|
||||
storage:
|
||||
s3:
|
||||
bucketnames: loki-ruler
|
12
scripts/otel/multi_curl.sh
Executable file
12
scripts/otel/multi_curl.sh
Executable file
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This allows testing a bunch of endpoints in a really dumb way
|
||||
|
||||
COMMAND="curl -ks"
|
||||
|
||||
# 404
|
||||
$COMMAND https://localhost:8443/asdfasfasfsadf > /dev/null 2>&1
|
||||
# auth fail
|
||||
$COMMAND --json '{"hello" : "world" }' https://localhost:8443/v1/auth > /dev/null 2>&1
|
||||
# good
|
||||
$COMMAND https://localhost:8443/status
|
19
scripts/otel/prometheus.yml
Normal file
19
scripts/otel/prometheus.yml
Normal file
|
@ -0,0 +1,19 @@
|
|||
global:
|
||||
scrape_interval: 30s # By default, scrape targets every 15 seconds.
|
||||
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
# external_labels:
|
||||
# monitor: "codelab-monitor"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: "prometheus"
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
# scrape_interval: 30s
|
||||
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
8
scripts/otel/startup.sh
Executable file
8
scripts/otel/startup.sh
Executable file
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
echo "Tearing down"
|
||||
docker-compose down -t0
|
||||
echo "Building up"
|
||||
docker-compose up -d
|
||||
echo "LOG TIME!"
|
||||
docker-compose logs -f
|
69
scripts/otel/tempo.yaml
Normal file
69
scripts/otel/tempo.yaml
Normal file
|
@ -0,0 +1,69 @@
|
|||
---
|
||||
# It should be *very* clear that this is an insecure, dev-only configuration. Don't run this in production!
|
||||
|
||||
# config docs https://grafana.com/docs/tempo/latest/configuration/#compactor
|
||||
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
grpc_server_max_recv_msg_size: 20971520 # 20MiB
|
||||
grpc_server_max_send_msg_size: 20971520 # 20MiB
|
||||
query_frontend:
|
||||
search:
|
||||
duration_slo: 5s
|
||||
throughput_bytes_slo: 1.073741824e+09
|
||||
trace_by_id:
|
||||
duration_slo: 5s
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
# http:
|
||||
grpc:
|
||||
max_recv_msg_size_mib: 20
|
||||
|
||||
opencensus:
|
||||
|
||||
# ingester:
|
||||
# max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
# Optional. Duration to keep blocks. Default is 14 days (336h).
|
||||
block_retention: 24h
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
cluster: docker-compose
|
||||
storage:
|
||||
# path: /tmp/tempo/generator/wal
|
||||
remote_write:
|
||||
- url: http://prometheus:9090/api/v1/write
|
||||
send_exemplars: true
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: s3 # we're using minio anyway!
|
||||
s3:
|
||||
bucket: tempo
|
||||
endpoint: minio:9000
|
||||
region: minio
|
||||
insecure: true
|
||||
access_key: loki
|
||||
secret_key: supersecret
|
||||
# backend: local
|
||||
# wal:
|
||||
# path: /tmp/tempo/wal # where to store the the wal locally
|
||||
# local:
|
||||
# path: /tmp/tempo/blocks
|
||||
|
||||
overrides:
|
||||
defaults:
|
||||
metrics_generator:
|
||||
processors:
|
||||
- service-graphs
|
||||
- span-metrics # enables metrics generator
|
||||
global:
|
||||
max_bytes_per_trace: 20971520 # 20MiB
|
|
@ -1568,7 +1568,7 @@ impl QueryServerWriteV1 {
|
|||
let res = idms_prox_write
|
||||
.qs_write
|
||||
.purge_tombstones()
|
||||
.and_then(|_| idms_prox_write.commit());
|
||||
.and_then(|_changed| idms_prox_write.commit());
|
||||
|
||||
match res {
|
||||
Ok(()) => {
|
||||
|
@ -1592,7 +1592,14 @@ impl QueryServerWriteV1 {
|
|||
let res = idms_prox_write
|
||||
.qs_write
|
||||
.purge_recycled()
|
||||
.and_then(|_| idms_prox_write.commit());
|
||||
.and_then(|touched| {
|
||||
// don't need to commit a txn with no changes
|
||||
if touched > 0 {
|
||||
idms_prox_write.commit()
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
});
|
||||
|
||||
match res {
|
||||
Ok(()) => {
|
||||
|
|
|
@ -20,7 +20,7 @@ use kanidm_lib_crypto::prelude::X509;
|
|||
use kanidm_lib_crypto::serialise::x509b64;
|
||||
|
||||
use serde::Deserialize;
|
||||
use sketching::tracing_subscriber::EnvFilter;
|
||||
use sketching::LogLevel;
|
||||
use url::Url;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
|
@ -171,6 +171,8 @@ pub struct ServerConfig {
|
|||
#[serde(rename = "replication")]
|
||||
/// Replication configuration, this is a development feature and not yet ready for production use.
|
||||
pub repl_config: Option<ReplicationConfiguration>,
|
||||
/// An optional OpenTelemetry collector (GRPC) url to send trace and log data to, eg http://localhost:4317
|
||||
pub otel_grpc_url: Option<String>,
|
||||
}
|
||||
|
||||
impl ServerConfig {
|
||||
|
@ -233,50 +235,6 @@ impl FromStr for ServerRole {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug, Default)]
|
||||
pub enum LogLevel {
|
||||
#[default]
|
||||
#[serde(rename = "info")]
|
||||
Info,
|
||||
#[serde(rename = "debug")]
|
||||
Debug,
|
||||
#[serde(rename = "trace")]
|
||||
Trace,
|
||||
}
|
||||
|
||||
impl FromStr for LogLevel {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"info" => Ok(LogLevel::Info),
|
||||
"debug" => Ok(LogLevel::Debug),
|
||||
"trace" => Ok(LogLevel::Trace),
|
||||
_ => Err("Must be one of info, debug, trace"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for LogLevel {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
LogLevel::Info => "info".to_string(),
|
||||
LogLevel::Debug => "debug".to_string(),
|
||||
LogLevel::Trace => "trace".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LogLevel> for EnvFilter {
|
||||
fn from(value: LogLevel) -> Self {
|
||||
match value {
|
||||
LogLevel::Info => EnvFilter::new("info"),
|
||||
LogLevel::Debug => EnvFilter::new("debug"),
|
||||
LogLevel::Trace => EnvFilter::new("trace"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IntegrationTestConfig {
|
||||
pub admin_user: String,
|
||||
|
@ -434,7 +392,6 @@ impl Configuration {
|
|||
}
|
||||
|
||||
pub fn update_log_level(&mut self, level: &Option<LogLevel>) {
|
||||
let level = level.clone();
|
||||
self.log_level = level.unwrap_or_default();
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@ use tokio_openssl::SslStream;
|
|||
|
||||
use futures_util::future::poll_fn;
|
||||
use tokio::net::TcpListener;
|
||||
use tracing::Level;
|
||||
|
||||
use std::io::ErrorKind;
|
||||
use std::path::PathBuf;
|
||||
|
@ -47,7 +46,7 @@ use std::sync::Arc;
|
|||
use std::{net::SocketAddr, str::FromStr};
|
||||
use tokio::sync::broadcast;
|
||||
use tower_http::services::ServeDir;
|
||||
use tower_http::trace::{DefaultOnRequest, TraceLayer};
|
||||
use tower_http::trace::TraceLayer;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::CoreAction;
|
||||
|
@ -288,7 +287,7 @@ pub async fn create_https_server(
|
|||
let trace_layer = TraceLayer::new_for_http()
|
||||
.make_span_with(trace::DefaultMakeSpanKanidmd::new())
|
||||
// setting these to trace because all they do is print "started processing request", and we are already doing that enough!
|
||||
.on_request(DefaultOnRequest::new().level(Level::TRACE));
|
||||
.on_response(trace::DefaultOnResponseKanidmd::new());
|
||||
|
||||
let app = app
|
||||
.merge(static_routes)
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
//! Reimplementation of tower-http's DefaultMakeSpan that only runs at "INFO" level for our own needs.
|
||||
|
||||
use http::Request;
|
||||
use kanidm_proto::constants::KOPID;
|
||||
use sketching::event_dynamic_lvl;
|
||||
use tower_http::LatencyUnit;
|
||||
use tracing::{Level, Span};
|
||||
|
||||
/// The default way Spans will be created for Trace.
|
||||
|
@ -22,7 +25,7 @@ impl Default for DefaultMakeSpanKanidmd {
|
|||
}
|
||||
|
||||
impl<B> tower_http::trace::MakeSpan<B> for DefaultMakeSpanKanidmd {
|
||||
#[instrument(name = "handle_request", skip_all)]
|
||||
#[instrument(name = "handle_request", skip_all, fields(latency, status_code))]
|
||||
fn make_span(&mut self, request: &Request<B>) -> Span {
|
||||
tracing::span!(
|
||||
Level::INFO,
|
||||
|
@ -33,3 +36,64 @@ impl<B> tower_http::trace::MakeSpan<B> for DefaultMakeSpanKanidmd {
|
|||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct DefaultOnResponseKanidmd {
|
||||
#[allow(dead_code)]
|
||||
level: Level,
|
||||
#[allow(dead_code)]
|
||||
latency_unit: LatencyUnit,
|
||||
#[allow(dead_code)]
|
||||
include_headers: bool,
|
||||
}
|
||||
|
||||
impl DefaultOnResponseKanidmd {
|
||||
#[allow(dead_code)]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DefaultOnResponseKanidmd {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
level: Level::INFO,
|
||||
latency_unit: LatencyUnit::Millis,
|
||||
include_headers: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> tower_http::trace::OnResponse<B> for DefaultOnResponseKanidmd {
|
||||
fn on_response(
|
||||
self,
|
||||
response: &axum::response::Response<B>,
|
||||
latency: std::time::Duration,
|
||||
_span: &Span,
|
||||
) {
|
||||
let kopid = match response.headers().get(KOPID) {
|
||||
Some(val) => val.to_str().unwrap_or("<invalid kopid>"),
|
||||
None => "<unknown>",
|
||||
};
|
||||
let (level, msg) =
|
||||
match response.status().is_success() || response.status().is_informational() {
|
||||
true => (Level::INFO, "response sent"),
|
||||
false => {
|
||||
if response.status().is_redirection() {
|
||||
(Level::INFO, "client redirection sent")
|
||||
} else if response.status().is_client_error() {
|
||||
(Level::WARN, "client error") // it worked, but there was an input error
|
||||
} else {
|
||||
(Level::ERROR, "error handling request") // oh no the server failed
|
||||
}
|
||||
}
|
||||
};
|
||||
event_dynamic_lvl!(
|
||||
level,
|
||||
?latency,
|
||||
status_code = response.status().as_u16(),
|
||||
kopid = kopid,
|
||||
msg
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,12 @@ serde = { workspace = true, features = ["derive"] }
|
|||
tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal"] }
|
||||
tokio-util = { workspace = true, features = ["codec"] }
|
||||
toml = { workspace = true }
|
||||
opentelemetry = { workspace = true, features = ["logs"] }
|
||||
opentelemetry_api = { workspace = true, features = ["logs"] }
|
||||
tracing = { workspace = true, features = [
|
||||
"max_level_trace",
|
||||
"release_max_level_debug",
|
||||
] }
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
sd-notify.workspace = true
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -518,7 +518,7 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
/// Make a list of all the non-admin BuiltinGroup's that are created by default, doing it in a standard-ish way so we can use it for testing and stuff
|
||||
/// Make a list of all the non-admin BuiltinGroup's that are created by default, doing it in a standard-ish way so we can use it around the platform
|
||||
pub fn idm_builtin_non_admin_groups() -> Vec<&'static BuiltinGroup> {
|
||||
// Create any system default schema entries.
|
||||
vec![
|
||||
|
|
|
@ -62,7 +62,16 @@ impl Plugin for Domain {
|
|||
}
|
||||
}
|
||||
|
||||
fn generate_domain_cookie_key() -> Value {
|
||||
let mut key = [0; 64];
|
||||
let mut rng = StdRng::from_entropy();
|
||||
rng.fill(&mut key);
|
||||
Value::new_privatebinary(&key)
|
||||
}
|
||||
|
||||
impl Domain {
|
||||
/// Generates the cookie key for the domain.
|
||||
|
||||
fn modify_inner<T: Clone + std::fmt::Debug>(
|
||||
qs: &mut QueryServerWriteTransaction,
|
||||
cand: &mut [Entry<EntryInvalid, T>],
|
||||
|
@ -129,11 +138,7 @@ impl Domain {
|
|||
|
||||
if !e.attribute_pres(Attribute::PrivateCookieKey) {
|
||||
security_info!("regenerating domain cookie key");
|
||||
let mut key = [0; 64];
|
||||
let mut rng = StdRng::from_entropy();
|
||||
rng.fill(&mut key);
|
||||
let v = Value::new_privatebinary(&key);
|
||||
e.add_ava(Attribute::PrivateCookieKey, v);
|
||||
e.add_ava(Attribute::PrivateCookieKey, generate_domain_cookie_key());
|
||||
}
|
||||
|
||||
trace!(?e);
|
||||
|
|
|
@ -155,19 +155,20 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
res
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
/// - If the thing exists:
|
||||
/// - Ensure the set of attributes match and are present
|
||||
/// (but don't delete multivalue, or extended attributes in the situation.
|
||||
/// - If not:
|
||||
/// - Create the entry
|
||||
///
|
||||
/// This will extra classes an attributes alone!
|
||||
///
|
||||
/// NOTE: `gen_modlist*` IS schema aware and will handle multivalue correctly!
|
||||
pub fn internal_migrate_or_create(
|
||||
&mut self,
|
||||
e: Entry<EntryInit, EntryNew>,
|
||||
) -> Result<(), OperationError> {
|
||||
// if the thing exists, ensure the set of attributes on
|
||||
// Entry A match and are present (but don't delete multivalue, or extended
|
||||
// attributes in the situation.
|
||||
// If not exist, create from Entry B
|
||||
//
|
||||
// This will extra classes an attributes alone!
|
||||
//
|
||||
// NOTE: gen modlist IS schema aware and will handle multivalue
|
||||
// correctly!
|
||||
trace!("internal_migrate_or_create operating on {:?}", e.get_uuid());
|
||||
|
||||
let Some(filt) = e.filter_from_attrs(&[Attribute::Uuid.into()]) else {
|
||||
|
@ -298,7 +299,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
/// a future version.
|
||||
///
|
||||
/// An extended feature of this is the ability to store multiple TOTP's per entry.
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
pub fn migrate_9_to_10(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 9 to 10 migration.");
|
||||
let filter = filter!(f_or!([
|
||||
|
@ -318,7 +319,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
/// are, they are migrated to the passkey type, allowing us to deprecate and remove the older
|
||||
/// credential behaviour.
|
||||
///
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
pub fn migrate_10_to_11(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 9 to 10 migration.");
|
||||
let filter = filter!(f_pres(Attribute::PrimaryCredential));
|
||||
|
@ -363,9 +364,9 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
|
||||
/// Migrate 11 to 12
|
||||
///
|
||||
/// Rewrite api-tokens from session to a dedicated api token type.
|
||||
/// Rewrite api-tokens from session to a dedicated API token type.
|
||||
///
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
pub fn migrate_11_to_12(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 11 to 12 migration.");
|
||||
// sync_token_session
|
||||
|
@ -421,7 +422,8 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
self.internal_apply_writable(mod_candidates)
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
/// Deletes the Domain info privatecookiekey to force a regeneration as we changed the format
|
||||
pub fn migrate_12_to_13(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 12 to 13 migration.");
|
||||
let filter = filter!(f_and!([
|
||||
|
@ -434,7 +436,8 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
// Complete
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
/// - Deletes the incorrectly added "member" attribute on dynamic groups
|
||||
pub fn migrate_13_to_14(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 13 to 14 migration.");
|
||||
let filter = filter!(f_eq(
|
||||
|
@ -447,18 +450,20 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
// Complete
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
/// - Deletes the non-existing attribute for idverification private key which triggers it to regen
|
||||
pub fn migrate_14_to_15(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 14 to 15 migration.");
|
||||
let filter = filter!(f_eq(Attribute::Class, EntryClass::Person.into()));
|
||||
// Delete the non-existing attr for idv private key which triggers
|
||||
// it to regen.
|
||||
// Delete the non-existing attr for idv private key which triggers it to regen.
|
||||
let modlist = ModifyList::new_purge(Attribute::IdVerificationEcKey);
|
||||
self.internal_modify(&filter, &modlist)
|
||||
// Complete
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
/// - updates the system config to include the new session expiry values.
|
||||
/// - adds the account policy object to idm_all_accounts
|
||||
pub fn migrate_15_to_16(&mut self) -> Result<(), OperationError> {
|
||||
admin_warn!("starting 15 to 16 migration.");
|
||||
|
||||
|
@ -509,7 +514,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
// Complete
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
pub fn initialise_schema_core(&mut self) -> Result<(), OperationError> {
|
||||
admin_debug!("initialise_schema_core -> start ...");
|
||||
// Load in all the "core" schema, that we already have in "memory".
|
||||
|
@ -532,7 +537,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
r
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
pub fn initialise_schema_idm(&mut self) -> Result<(), OperationError> {
|
||||
admin_debug!("initialise_schema_idm -> start ...");
|
||||
|
||||
|
@ -652,8 +657,8 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
r
|
||||
}
|
||||
|
||||
// This function is idempotent
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
#[instrument(level = "info", skip_all)]
|
||||
/// This function is idempotent, runs all the startup functionality and checks
|
||||
pub fn initialise_idm(&mut self) -> Result<(), OperationError> {
|
||||
// First, check the system_info object. This stores some server information
|
||||
// and details. It's a pretty const thing. Also check anonymous, important to many
|
||||
|
@ -684,9 +689,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
debug_assert!(res.is_ok());
|
||||
res?;
|
||||
|
||||
let idm_entries = idm_builtin_non_admin_groups();
|
||||
|
||||
let res: Result<(), _> = idm_entries
|
||||
let res: Result<(), _> = idm_builtin_non_admin_groups()
|
||||
.into_iter()
|
||||
.try_for_each(|e| self.internal_migrate_or_create(e.clone().try_into()?));
|
||||
if res.is_ok() {
|
||||
|
@ -756,7 +759,8 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
res?;
|
||||
|
||||
// Delete entries that no longer need to exist.
|
||||
let delete_entries = [UUID_IDM_ACP_OAUTH2_READ_PRIV_V1];
|
||||
// TODO: Shouldn't this be a migration?
|
||||
let delete_entries: [Uuid; 1] = [UUID_IDM_ACP_OAUTH2_READ_PRIV_V1];
|
||||
|
||||
let res: Result<(), _> = delete_entries
|
||||
.into_iter()
|
||||
|
|
|
@ -6,7 +6,7 @@ use hashbrown::HashMap;
|
|||
|
||||
impl<'a> QueryServerWriteTransaction<'a> {
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
pub fn purge_tombstones(&mut self) -> Result<(), OperationError> {
|
||||
pub fn purge_tombstones(&mut self) -> Result<usize, OperationError> {
|
||||
// purge everything that is a tombstone.
|
||||
let trim_cid = self.trim_cid().clone();
|
||||
|
||||
|
@ -17,17 +17,18 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
error!(err = ?e, "Tombstone purge operation failed (backend)");
|
||||
e
|
||||
})
|
||||
.map(|_| {
|
||||
.map(|res| {
|
||||
admin_info!("Tombstone purge operation success");
|
||||
res
|
||||
})
|
||||
}
|
||||
|
||||
#[instrument(level = "debug", skip_all)]
|
||||
pub fn purge_recycled(&mut self) -> Result<(), OperationError> {
|
||||
pub fn purge_recycled(&mut self) -> Result<usize, OperationError> {
|
||||
// Send everything that is recycled to tombstone
|
||||
// Search all recycled
|
||||
let cid = self.cid.sub_secs(RECYCLEBIN_MAX_AGE).map_err(|e| {
|
||||
admin_error!(err = ?e, "Unable to generate search cid");
|
||||
admin_error!(err = ?e, "Unable to generate search cid for purge_recycled");
|
||||
e
|
||||
})?;
|
||||
let rc = self.internal_search(filter_all!(f_and!([
|
||||
|
@ -36,8 +37,8 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
])))?;
|
||||
|
||||
if rc.is_empty() {
|
||||
admin_info!("No recycled items present - purge operation success");
|
||||
return Ok(());
|
||||
admin_debug!("No recycled items present - purge operation success");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Modify them to strip all avas except uuid
|
||||
|
@ -56,6 +57,9 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
.collect();
|
||||
|
||||
let tombstone_cand = tombstone_cand?;
|
||||
// it's enough to say "yeah we tried to touch this many" because
|
||||
// we're using this to decide if we're going to commit the txn
|
||||
let touched = tombstone_cand.len();
|
||||
|
||||
// Backend Modify
|
||||
self.be_txn
|
||||
|
@ -66,6 +70,7 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
})
|
||||
.map(|_| {
|
||||
admin_info!("Purge recycled operation success");
|
||||
touched
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@ use uuid::Uuid;
|
|||
|
||||
use crate::prelude::*;
|
||||
|
||||
// TODO: this should *totally* be running the OTEL metrics collector
|
||||
|
||||
pub struct StatusRequestEvent {
|
||||
pub eventid: Uuid,
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ serde = { workspace = true, features = ["derive"] }
|
|||
serde_json = { workspace = true }
|
||||
serde-wasm-bindgen = { workspace = true }
|
||||
time = { workspace = true }
|
||||
url = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
wasm-bindgen = { workspace = true }
|
||||
wasm-bindgen-futures = { workspace = true }
|
||||
|
|
Binary file not shown.
|
@ -27,7 +27,6 @@ serde_json = { workspace = true }
|
|||
serde-wasm-bindgen = { workspace = true }
|
||||
wasm-bindgen = { workspace = true }
|
||||
wasm-bindgen-futures = { workspace = true }
|
||||
url = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
yew = { workspace = true, features = ["csr"] }
|
||||
yew-router = { workspace = true }
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
server/web_ui/pkg/external/bootstrap.min.css.map.br
vendored
BIN
server/web_ui/pkg/external/bootstrap.min.css.map.br
vendored
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -225,7 +225,7 @@ function makeMutClosure(arg0, arg1, dtor, f) {
|
|||
return real;
|
||||
}
|
||||
function __wbg_adapter_48(arg0, arg1) {
|
||||
wasm._dyn_core__ops__function__FnMut_____Output___R_as_wasm_bindgen__closure__WasmClosure___describe__invoke__h09aa096681cc0b01(arg0, arg1);
|
||||
wasm._dyn_core__ops__function__FnMut_____Output___R_as_wasm_bindgen__closure__WasmClosure___describe__invoke__ha55f8bc2a1dec3e6(arg0, arg1);
|
||||
}
|
||||
|
||||
let stack_pointer = 128;
|
||||
|
@ -1130,20 +1130,20 @@ function __wbg_get_imports() {
|
|||
const ret = wasm.memory;
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper1151 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 587, __wbg_adapter_48);
|
||||
imports.wbg.__wbindgen_closure_wrapper1249 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 595, __wbg_adapter_48);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper3671 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1710, __wbg_adapter_51);
|
||||
imports.wbg.__wbindgen_closure_wrapper3675 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1711, __wbg_adapter_51);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper3751 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1739, __wbg_adapter_54);
|
||||
imports.wbg.__wbindgen_closure_wrapper3755 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1740, __wbg_adapter_54);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper3835 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1776, __wbg_adapter_57);
|
||||
imports.wbg.__wbindgen_closure_wrapper3839 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1777, __wbg_adapter_57);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -28,7 +28,6 @@ serde_json = { workspace = true }
|
|||
serde-wasm-bindgen = { workspace = true }
|
||||
wasm-bindgen = { workspace = true }
|
||||
wasm-bindgen-futures = { workspace = true }
|
||||
url = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
yew = { workspace = true, features = ["csr"] }
|
||||
yew-router = { workspace = true }
|
||||
|
|
|
@ -225,7 +225,7 @@ function makeMutClosure(arg0, arg1, dtor, f) {
|
|||
return real;
|
||||
}
|
||||
function __wbg_adapter_48(arg0, arg1) {
|
||||
wasm._dyn_core__ops__function__FnMut_____Output___R_as_wasm_bindgen__closure__WasmClosure___describe__invoke__h09aa096681cc0b01(arg0, arg1);
|
||||
wasm._dyn_core__ops__function__FnMut_____Output___R_as_wasm_bindgen__closure__WasmClosure___describe__invoke__ha55f8bc2a1dec3e6(arg0, arg1);
|
||||
}
|
||||
|
||||
let stack_pointer = 128;
|
||||
|
@ -1130,20 +1130,20 @@ function __wbg_get_imports() {
|
|||
const ret = wasm.memory;
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper1151 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 587, __wbg_adapter_48);
|
||||
imports.wbg.__wbindgen_closure_wrapper1249 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 595, __wbg_adapter_48);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper3671 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1710, __wbg_adapter_51);
|
||||
imports.wbg.__wbindgen_closure_wrapper3675 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1711, __wbg_adapter_51);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper3751 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1739, __wbg_adapter_54);
|
||||
imports.wbg.__wbindgen_closure_wrapper3755 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1740, __wbg_adapter_54);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
imports.wbg.__wbindgen_closure_wrapper3835 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1776, __wbg_adapter_57);
|
||||
imports.wbg.__wbindgen_closure_wrapper3839 = function(arg0, arg1, arg2) {
|
||||
const ret = makeMutClosure(arg0, arg1, 1777, __wbg_adapter_57);
|
||||
return addHeapObject(ret);
|
||||
};
|
||||
|
||||
|
|
Binary file not shown.
Loading…
Reference in a new issue