From e7d91ed55d15ee702ddc2e9ad1e6077262af7078 Mon Sep 17 00:00:00 2001
From: Firstyear <william@blackhats.net.au>
Date: Sun, 12 Jan 2025 13:53:31 +1000
Subject: [PATCH] 20250110 eo fixes (#3353)

While preparing for everything open, I found a small number of doc/book issues, some logging issues, and some minor performance wins. This pr is just small bits of various polish around the place.
---
 book/src/sync/freeipa.md            | 22 +++++++++++--
 examples/kanidm-ipa-sync            |  5 +--
 server/core/src/https/v1_scim.rs    | 40 +++++++++++++++++-------
 server/lib/src/be/mod.rs            | 17 ++++++++--
 server/lib/src/filter.rs            | 48 +++++++++++++++++++++++++++++
 server/lib/src/idm/scim.rs          | 27 ++++++++++------
 server/lib/src/server/access/mod.rs |  3 ++
 server/lib/src/server/delete.rs     |  3 +-
 8 files changed, 137 insertions(+), 28 deletions(-)

diff --git a/book/src/sync/freeipa.md b/book/src/sync/freeipa.md
index 4da11cbe9..8de1f0517 100644
--- a/book/src/sync/freeipa.md
+++ b/book/src/sync/freeipa.md
@@ -33,7 +33,8 @@ You can find the name of your 389 Directory Server instance with:
 
 ```bash
 # Run on the FreeIPA server
-dsconf --list
+dsctl --list
+> slapd-DEV-KANIDM-COM
 ```
 
 Using this you can show the current status of the retro changelog plugin to see if you need to
@@ -83,6 +84,20 @@ kanidm-ipa-sync [-c /path/to/kanidm/config] -i /path/to/kanidm-ipa-sync -n
 kanidm-ipa-sync -i /etc/kanidm/ipa-sync -n
 ```
 
+As the sync tool is part of the tools container, you can run this with:
+
+```bash
+docker run --rm -i -t \
+  --user uid:gid \
+  -p 12345:12345 \
+  -v /etc/kanidm/config:/etc/kanidm/config:ro \
+  -v /path/to/kanidm.ca.pem:/path/to/kanidm.ca.pem:ro
+  -v /path/to/ipa-ca.pem:/etc/kanidm/ipa-ca.pem:ro \
+  -v /path/to/ipa-sync:/etc/kanidm/ipa-sync:ro \
+  kanidm/tools:latest \
+  kanidm-ipa-sync -i /etc/kanidm/ipa-sync -
+```
+
 ## Running the Sync Tool Automatically
 
 The sync tool can be run on a schedule if you configure the `schedule` parameter, and provide the
@@ -96,11 +111,14 @@ kanidm-ipa-sync -i /etc/kanidm/ipa-sync --schedule
 As the sync tool is part of the tools container, you can run this with:
 
 ```bash
-docker create --name kanidm-ipa-sync \
+docker run --name kanidm-ipa-sync \
   --user uid:gid \
   -p 12345:12345 \
   -v /etc/kanidm/config:/etc/kanidm/config:ro \
+  -v /path/to/kanidm.ca.pem:/path/to/kanidm.ca.pem:ro
+  -v /path/to/ipa-ca.pem:/etc/kanidm/ipa-ca.pem:ro \
   -v /path/to/ipa-sync:/etc/kanidm/ipa-sync:ro \
+  kanidm/tools:latest \
   kanidm-ipa-sync -i /etc/kanidm/ipa-sync --schedule
 ```
 
diff --git a/examples/kanidm-ipa-sync b/examples/kanidm-ipa-sync
index 8e122648f..4a7052510 100644
--- a/examples/kanidm-ipa-sync
+++ b/examples/kanidm-ipa-sync
@@ -17,8 +17,9 @@ sync_token = "eyJhb..."
 # server in the IPA topology rather than via a load balancer or dns srv records. This
 # is to prevent replication conflicts and issues due to how 389-ds content sync works.
 ipa_uri = "ldaps://specific-server.ipa.dev.kanidm.com"
-# Path to the IPA CA certificate in PEM format.
-ipa_ca = "/path/to/kanidm-ipa-ca.pem"
+# Path to the IPA CA certificate in PEM format. This can be found on an IPA server
+# in the file `/etc/ipa/ca.crt`
+ipa_ca = "/path/to/ipa-ca.pem"
 # The DN of an account with content sync rights. By default cn=Directory Manager has
 # this access.
 ipa_sync_dn = "cn=Directory Manager"
diff --git a/server/core/src/https/v1_scim.rs b/server/core/src/https/v1_scim.rs
index 8b38fa18a..35ba8da92 100644
--- a/server/core/src/https/v1_scim.rs
+++ b/server/core/src/https/v1_scim.rs
@@ -7,8 +7,8 @@ use super::v1::{
 };
 use super::ServerState;
 use crate::https::extractors::VerifiedClientInformation;
-use axum::extract::{Path, Query, State};
-use axum::response::Html;
+use axum::extract::{rejection::JsonRejection, DefaultBodyLimit, Path, Query, State};
+use axum::response::{Html, IntoResponse, Response};
 use axum::routing::{get, post};
 use axum::{Extension, Json, Router};
 use kanidm_proto::scim_v1::{
@@ -17,6 +17,8 @@ use kanidm_proto::scim_v1::{
 use kanidm_proto::v1::Entry as ProtoEntry;
 use kanidmd_lib::prelude::*;
 
+const DEFAULT_SCIM_SYNC_BYTES: usize = 1024 * 1024 * 32;
+
 #[utoipa::path(
     get,
     path = "/v1/sync_account",
@@ -271,14 +273,25 @@ async fn scim_sync_post(
     State(state): State<ServerState>,
     Extension(kopid): Extension<KOpId>,
     VerifiedClientInformation(client_auth_info): VerifiedClientInformation,
-    Json(changes): Json<ScimSyncRequest>,
-) -> Result<Json<()>, WebError> {
-    state
-        .qe_w_ref
-        .handle_scim_sync_apply(client_auth_info, changes, kopid.eventid)
-        .await
-        .map(Json::from)
-        .map_err(WebError::from)
+    payload: Result<Json<ScimSyncRequest>, JsonRejection>,
+) -> Response {
+    match payload {
+        Ok(Json(changes)) => {
+            let res = state
+                .qe_w_ref
+                .handle_scim_sync_apply(client_auth_info, changes, kopid.eventid)
+                .await;
+
+            match res {
+                Ok(data) => Json::from(data).into_response(),
+                Err(err) => WebError::from(err).into_response(),
+            }
+        }
+        Err(rejection) => {
+            error!(?rejection, "Unable to process JSON");
+            rejection.into_response()
+        }
+    }
 }
 
 #[utoipa::path(
@@ -473,6 +486,11 @@ pub fn route_setup() -> Router<ServerState> {
         //
         //                            POST                   Send a sync update
         //
-        .route("/scim/v1/Sync", post(scim_sync_post).get(scim_sync_get))
+        .route(
+            "/scim/v1/Sync",
+            post(scim_sync_post)
+                .layer(DefaultBodyLimit::max(DEFAULT_SCIM_SYNC_BYTES))
+                .get(scim_sync_get),
+        )
         .route("/scim/v1/Sink", get(scim_sink_get)) // skip_route_check
 }
diff --git a/server/lib/src/be/mod.rs b/server/lib/src/be/mod.rs
index d51a12268..dfc23a2e5 100644
--- a/server/lib/src/be/mod.rs
+++ b/server/lib/src/be/mod.rs
@@ -94,11 +94,22 @@ impl Limits {
     }
 }
 
+/// The result of a key value request containing the list of entry IDs that
+/// match the filter/query condition.
 #[derive(Debug, Clone)]
 pub enum IdList {
+    /// The value is not indexed, and must be assumed that all entries may match.
     AllIds,
-    PartialThreshold(IDLBitRange),
+    /// The index is "fuzzy" like a bloom filter (perhaps superset is a better description) -
+    /// it containes all elements that do match, but may have extra elements that don't.
+    /// This requires the caller to perform a filter test to assert that all
+    /// returned entries match all assertions within the filter.
     Partial(IDLBitRange),
+    /// The set was indexed and is below the filter test threshold. This is because it's
+    /// now faster to test with the filter than to continue to access indexes at this point.
+    /// Like a partial set, this is a super set of the entries that match the query.
+    PartialThreshold(IDLBitRange),
+    /// The value is indexed and accurately represents the set of entries that precisely match.
     Indexed(IDLBitRange),
 }
 
@@ -640,7 +651,7 @@ pub trait BackendTransaction {
         let (idl, fplan) = trace_span!("be::search -> filter2idl")
             .in_scope(|| self.filter2idl(filt.to_inner(), FILTER_SEARCH_TEST_THRESHOLD))?;
 
-        debug!(search_filter_executed_plan = ?fplan);
+        debug!(search_filter_executed_plan = %fplan);
 
         match &idl {
             IdList::AllIds => {
@@ -736,7 +747,7 @@ pub trait BackendTransaction {
         let (idl, fplan) = trace_span!("be::exists -> filter2idl")
             .in_scope(|| self.filter2idl(filt.to_inner(), FILTER_EXISTS_TEST_THRESHOLD))?;
 
-        debug!(exist_filter_executed_plan = ?fplan);
+        debug!(exist_filter_executed_plan = %fplan);
 
         // Apply limits to the IdList.
         match &idl {
diff --git a/server/lib/src/filter.rs b/server/lib/src/filter.rs
index 2894d66dc..cfb09de53 100644
--- a/server/lib/src/filter.rs
+++ b/server/lib/src/filter.rs
@@ -358,6 +358,54 @@ pub enum FilterPlan {
     InclusionIndexed(Vec<FilterPlan>),
 }
 
+// This difference in this is that we want to show unindexed elements more prominently
+// in the execution.
+
+fn fmt_filterplan_set(f: &mut fmt::Formatter<'_>, name: &str, plan: &[FilterPlan]) -> fmt::Result {
+    write!(f, "{name}(")?;
+    for item in plan {
+        write!(f, "{}, ", item)?;
+    }
+    write!(f, ")")
+}
+
+impl fmt::Display for FilterPlan {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Invalid => write!(f, "Invalid"),
+            Self::EqIndexed(attr, _) => write!(f, "EqIndexed({attr})"),
+            Self::EqCorrupt(attr) => write!(f, "EqCorrupt({attr})"),
+            Self::EqUnindexed(attr) => write!(f, "EqUnindexed({attr})"),
+
+            Self::SubIndexed(attr, _) => write!(f, "SubIndexed({attr})"),
+            Self::SubCorrupt(attr) => write!(f, "SubCorrupt({attr})"),
+            Self::SubUnindexed(attr) => write!(f, "SubUnindexed({attr})"),
+
+            Self::PresIndexed(attr) => write!(f, "PresIndexed({attr})"),
+            Self::PresCorrupt(attr) => write!(f, "PresCorrupt({attr})"),
+            Self::PresUnindexed(attr) => write!(f, "PresUnindexed({attr})"),
+
+            Self::LessThanUnindexed(attr) => write!(f, "LessThanUnindexed({attr})"),
+
+            Self::OrUnindexed(plan) => fmt_filterplan_set(f, "OrUnindexed", plan),
+            Self::OrIndexed(plan) => write!(f, "OrIndexed(len={})", plan.len()),
+            Self::OrPartial(plan) => fmt_filterplan_set(f, "OrPartial", plan),
+            Self::OrPartialThreshold(plan) => fmt_filterplan_set(f, "OrPartialThreshold", plan),
+
+            Self::AndEmptyCand(plan) => write!(f, "AndEmptyCand(len={})", plan.len()),
+            Self::AndUnindexed(plan) => fmt_filterplan_set(f, "AndUnindexed", plan),
+            Self::AndIndexed(plan) => write!(f, "AndIndexed(len={})", plan.len()),
+            Self::AndPartial(plan) => fmt_filterplan_set(f, "AndPartial", plan),
+            Self::AndPartialThreshold(plan) => fmt_filterplan_set(f, "AndPartialThreshold", plan),
+
+            Self::AndNot(plan) => write!(f, "AndNot({plan})"),
+
+            Self::InclusionInvalid(plan) => fmt_filterplan_set(f, "InclusionInvalid", plan),
+            Self::InclusionIndexed(plan) => write!(f, "InclusionIndexed(len={})", plan.len()),
+        }
+    }
+}
+
 /// A `Filter` is a logical set of assertions about the state of an [`Entry`] and
 /// it's avas. `Filter`s are built from a set of possible assertions.
 ///
diff --git a/server/lib/src/idm/scim.rs b/server/lib/src/idm/scim.rs
index 65eb3eeb8..f65f8dd16 100644
--- a/server/lib/src/idm/scim.rs
+++ b/server/lib/src/idm/scim.rs
@@ -1,6 +1,9 @@
 use std::time::Duration;
 
-use base64::{engine::general_purpose::STANDARD, Engine as _};
+use base64::{
+    engine::general_purpose::{STANDARD, URL_SAFE},
+    Engine as _,
+};
 
 use compact_jwt::{Jws, JwsCompact, JwsEs256Signer, JwsSigner};
 use kanidm_proto::internal::{ApiTokenPurpose, ScimSyncToken};
@@ -532,10 +535,14 @@ impl IdmServerProxyWriteTransaction<'_> {
         self.scim_sync_apply_phase_4(&changes.retain, sync_uuid)?;
 
         // Final house keeping. Commit the new sync state.
-        self.scim_sync_apply_phase_5(sync_uuid, &changes.to_state)
+        self.scim_sync_apply_phase_5(sync_uuid, &changes.to_state)?;
+
+        info!("success");
+
+        Ok(())
     }
 
-    #[instrument(level = "debug", skip_all)]
+    #[instrument(level = "info", skip_all)]
     fn scim_sync_apply_phase_1<'b>(
         &mut self,
         sse: &'b ScimSyncUpdateEvent,
@@ -630,7 +637,7 @@ impl IdmServerProxyWriteTransaction<'_> {
         Ok((sync_uuid, sync_authority_set, change_entries, sync_refresh))
     }
 
-    #[instrument(level = "debug", skip_all)]
+    #[instrument(level = "info", skip_all)]
     pub(crate) fn scim_sync_apply_phase_2(
         &mut self,
         change_entries: &BTreeMap<Uuid, &ScimEntry>,
@@ -749,7 +756,7 @@ impl IdmServerProxyWriteTransaction<'_> {
         Ok(())
     }
 
-    #[instrument(level = "debug", skip_all)]
+    #[instrument(level = "info", skip_all)]
     pub(crate) fn scim_sync_apply_phase_refresh_cleanup(
         &mut self,
         change_entries: &BTreeMap<Uuid, &ScimEntry>,
@@ -922,7 +929,9 @@ impl IdmServerProxyWriteTransaction<'_> {
                         })
                         .and_then(|secret| match secret {
                             ScimAttr::String(value) => {
-                                STANDARD.decode(value.as_str())
+                                URL_SAFE.decode(value.as_str()).or_else(
+                                    |_| STANDARD.decode(value.as_str())
+                                )
                                     .map_err(|_| {
                                         error!("Invalid secret attribute - must be base64 string");
                                         OperationError::InvalidAttribute(format!(
@@ -1247,7 +1256,7 @@ impl IdmServerProxyWriteTransaction<'_> {
         Ok(ModifyList::new_list(mods))
     }
 
-    #[instrument(level = "debug", skip_all)]
+    #[instrument(level = "info", skip_all)]
     pub(crate) fn scim_sync_apply_phase_3(
         &mut self,
         change_entries: &BTreeMap<Uuid, &ScimEntry>,
@@ -1333,7 +1342,7 @@ impl IdmServerProxyWriteTransaction<'_> {
             })
     }
 
-    #[instrument(level = "debug", skip_all)]
+    #[instrument(level = "info", skip_all)]
     pub(crate) fn scim_sync_apply_phase_4(
         &mut self,
         retain: &ScimSyncRetentionMode,
@@ -1439,7 +1448,7 @@ impl IdmServerProxyWriteTransaction<'_> {
         }
     }
 
-    #[instrument(level = "debug", skip_all)]
+    #[instrument(level = "info", skip_all)]
     pub(crate) fn scim_sync_apply_phase_5(
         &mut self,
         sync_uuid: Uuid,
diff --git a/server/lib/src/server/access/mod.rs b/server/lib/src/server/access/mod.rs
index e57222b40..358992127 100644
--- a/server/lib/src/server/access/mod.rs
+++ b/server/lib/src/server/access/mod.rs
@@ -508,6 +508,8 @@ pub trait AccessControlsTransaction<'a> {
         let sync_agmts = self.get_sync_agreements();
 
         let r = entries.iter().all(|e| {
+            debug!(entry_id = %e.get_display_id());
+
             match apply_modify_access(&me.ident, related_acp.as_slice(), sync_agmts, e) {
                 ModifyResult::Denied => false,
                 ModifyResult::Grant => true,
@@ -634,6 +636,7 @@ pub trait AccessControlsTransaction<'a> {
             debug!(?requested_pres, "Requested present set");
             debug!(?requested_rem, "Requested remove set");
             debug!(?requested_classes, "Requested class set");
+            debug!(entry_id = %e.get_display_id());
 
             let sync_agmts = self.get_sync_agreements();
 
diff --git a/server/lib/src/server/delete.rs b/server/lib/src/server/delete.rs
index 09164a75b..e434ac07b 100644
--- a/server/lib/src/server/delete.rs
+++ b/server/lib/src/server/delete.rs
@@ -39,7 +39,8 @@ impl QueryServerWriteTransaction<'_> {
 
         // Is the candidate set empty?
         if pre_candidates.is_empty() {
-            warn!(filter = ?de.filter, "delete: no candidates match filter");
+            warn!("delete: no candidates match filter");
+            debug!(delete_filter = ?de.filter);
             return Err(OperationError::NoMatchingEntries);
         };