Fix for incorrect domain migration rollbacks (#2482)

This commit is contained in:
Firstyear 2024-02-07 13:11:55 +10:00 committed by GitHub
parent 9050188b29
commit cdbaefe23d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 82 additions and 28 deletions

View file

@ -12,8 +12,8 @@ tls_client_ca = "/tmp/kanidm/client_ca"
# NOTE: this is overridden by environment variables at runtime # NOTE: this is overridden by environment variables at runtime
# Defaults to "info" # Defaults to "info"
# #
# log_level = "info" log_level = "info"
log_level = "debug" # log_level = "debug"
# log_level = "trace" # log_level = "trace"
# otel_grpc_url = "http://localhost:4317" # otel_grpc_url = "http://localhost:4317"

View file

@ -301,6 +301,7 @@ pub enum OperationError {
// Migration // Migration
MG0001InvalidReMigrationLevel, MG0001InvalidReMigrationLevel,
MG0002RaiseDomainLevelExceedsMaximum, MG0002RaiseDomainLevelExceedsMaximum,
MG0003ServerPhaseInvalidForMigration,
} }
impl PartialEq for OperationError { impl PartialEq for OperationError {

View file

@ -750,7 +750,7 @@ lazy_static! {
Attribute::Description, Attribute::Description,
Value::new_utf8s("System (local) info and metadata object.") Value::new_utf8s("System (local) info and metadata object.")
), ),
(Attribute::Version, Value::Uint32(18)) (Attribute::Version, Value::Uint32(19))
); );
pub static ref E_DOMAIN_INFO_V1: EntryInitNew = entry_init!( pub static ref E_DOMAIN_INFO_V1: EntryInitNew = entry_init!(

View file

@ -19,7 +19,7 @@ pub use crate::constants::values::*;
use std::time::Duration; use std::time::Duration;
// Increment this as we add new schema types and values!!! // Increment this as we add new schema types and values!!!
pub const SYSTEM_INDEX_VERSION: i64 = 30; pub const SYSTEM_INDEX_VERSION: i64 = 31;
/* /*
* domain functional levels * domain functional levels

View file

@ -5,6 +5,7 @@ use kanidm_proto::v1::Filter as ProtoFilter;
use crate::filter::FilterInvalid; use crate::filter::FilterInvalid;
use crate::prelude::*; use crate::prelude::*;
use crate::server::ServerPhase;
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct DynGroupCache { pub struct DynGroupCache {
@ -35,6 +36,11 @@ impl DynGroup {
} }
*/ */
if qs.get_phase() < ServerPhase::SchemaReady {
trace!("Server is not ready to load dyngroups");
return Ok(());
}
// Search all the new groups first. // Search all the new groups first.
let filt = filter!(FC::Or( let filt = filter!(FC::Or(
n_dyn_groups n_dyn_groups

View file

@ -38,25 +38,6 @@ impl QueryServer {
.initialise_schema_core() .initialise_schema_core()
.and_then(|_| write_txn.reload())?; .and_then(|_| write_txn.reload())?;
write_txn
.initialise_schema_idm()
.and_then(|_| write_txn.reload())?;
// reindex and set to version + 1, this way when we bump the version
// we are essetially pushing this version id back up to step write_1
write_txn
.upgrade_reindex(SYSTEM_INDEX_VERSION + 1)
.and_then(|_| write_txn.reload())?;
// Force the schema to reload - this is so that any changes to index slope
// analysis that was performed during the reindex are now reflected correctly
// in the in-memory schema cache.
//
// A side effect of these reloads is that other plugins or elements that reload
// on schema change are now setup.
write_txn.set_phase(ServerPhase::SchemaReady);
write_txn.force_schema_reload();
write_txn.reload()?; write_txn.reload()?;
// Now, based on the system version apply migrations. You may ask "should you not // Now, based on the system version apply migrations. You may ask "should you not
@ -118,6 +99,10 @@ impl QueryServer {
} }
if system_info_version < 17 { if system_info_version < 17 {
write_txn.initialise_schema_idm()?;
write_txn.reload()?;
write_txn.migrate_16_to_17()?; write_txn.migrate_16_to_17()?;
} }
@ -130,9 +115,13 @@ impl QueryServer {
// the domain migrations kick in again. // the domain migrations kick in again.
write_txn.initialise_idm()?; write_txn.initialise_idm()?;
} }
if system_info_version < 19 {
write_txn.migrate_18_to_19()?;
}
} }
// Reload if anything in migrations requires it. // Reload if anything in the (older) system migrations requires it.
write_txn.reload()?; write_txn.reload()?;
// This is what tells us if the domain entry existed before or not. // This is what tells us if the domain entry existed before or not.
@ -144,11 +133,23 @@ impl QueryServer {
info!(?db_domain_version, "Before setting internal domain info"); info!(?db_domain_version, "Before setting internal domain info");
// Migrations complete. Init idm will now set the system config version and minimum domain // No domain info was present, so neither was the rest of the IDM. We need to bootstrap
// the base-schema here.
if db_domain_version == 0 {
write_txn.initialise_schema_idm()?;
write_txn.reload()?;
}
// Indicate the schema is now ready, which allows dyngroups to work.
write_txn.set_phase(ServerPhase::SchemaReady);
// Init idm will now set the system config version and minimum domain
// level if none was present // level if none was present
write_txn.initialise_domain_info()?; write_txn.initialise_domain_info()?;
// No domain info was present, so neither was the rest of the IDM. // No domain info was present, so neither was the rest of the IDM. We need to bootstrap
// the base entries here.
if db_domain_version == 0 { if db_domain_version == 0 {
write_txn.initialise_idm()?; write_txn.initialise_idm()?;
} }
@ -186,6 +187,20 @@ impl QueryServer {
// Reload if anything in migrations requires it - this triggers the domain migrations. // Reload if anything in migrations requires it - this triggers the domain migrations.
write_txn.reload()?; write_txn.reload()?;
// reindex and set to version + 1, this way when we bump the version
// we are essetially pushing this version id back up to step write_1
write_txn
.upgrade_reindex(SYSTEM_INDEX_VERSION + 1)
.and_then(|_| write_txn.reload())?;
// Force the schema to reload - this is so that any changes to index slope
// analysis that was performed during the reindex are now reflected correctly
// in the in-memory schema cache.
//
// A side effect of these reloads is that other plugins or elements that reload
// on schema change are now setup.
write_txn.force_schema_reload();
// We are ready to run // We are ready to run
write_txn.set_phase(ServerPhase::Running); write_txn.set_phase(ServerPhase::Running);
@ -676,6 +691,34 @@ impl<'a> QueryServerWriteTransaction<'a> {
Ok(()) Ok(())
} }
#[instrument(level = "info", skip_all)]
/// Automate fix for #2470 - force the domain version to be lowered, to allow
/// it to re-raise and force re-run migrations. This is because we accidentally
/// were "overwriting" the changes from domain migrations on startup due to
/// a logic error. At this point in the startup, the server phase is lower than
/// domain info ready, so the change won't immediately trigger remigrations. Rather
/// it will force them later in the startup.
pub fn migrate_18_to_19(&mut self) -> Result<(), OperationError> {
admin_warn!("starting 18 to 19 migration.");
debug_assert!(*self.phase < ServerPhase::DomainInfoReady);
if *self.phase >= ServerPhase::DomainInfoReady {
error!("Unable to perform system migration as server phase is greater or equal to domain info ready");
return Err(OperationError::MG0003ServerPhaseInvalidForMigration);
};
self.internal_modify_uuid(
UUID_DOMAIN_INFO,
&ModifyList::new_purge_and_set(Attribute::Version, Value::new_uint32(DOMAIN_LEVEL_2)),
)
.map(|()| {
warn!(
"Domain level has been temporarily lowered to {}",
DOMAIN_LEVEL_2
);
})
}
#[instrument(level = "info", skip_all)] #[instrument(level = "info", skip_all)]
/// This migration will /// This migration will
/// * Trigger a "once off" mfa account policy rule on all persons. /// * Trigger a "once off" mfa account policy rule on all persons.

View file

@ -53,8 +53,8 @@ const RESOLVE_FILTER_CACHE_LOCAL: usize = 0;
pub type ResolveFilterCacheReadTxn<'a> = pub type ResolveFilterCacheReadTxn<'a> =
ARCacheReadTxn<'a, (IdentityId, Filter<FilterValid>), Filter<FilterValidResolved>, ()>; ARCacheReadTxn<'a, (IdentityId, Filter<FilterValid>), Filter<FilterValidResolved>, ()>;
#[derive(Debug, Clone, PartialOrd, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialOrd, PartialEq, Eq)]
enum ServerPhase { pub(crate) enum ServerPhase {
Bootstrap, Bootstrap,
SchemaReady, SchemaReady,
DomainInfoReady, DomainInfoReady,
@ -1825,6 +1825,10 @@ impl<'a> QueryServerWriteTransaction<'a> {
*self.phase = phase *self.phase = phase
} }
pub(crate) fn get_phase(&mut self) -> ServerPhase {
*self.phase
}
pub(crate) fn reload(&mut self) -> Result<(), OperationError> { pub(crate) fn reload(&mut self) -> Result<(), OperationError> {
// First, check if the domain version has changed. // First, check if the domain version has changed.
if self.changed_domain { if self.changed_domain {