From b4f99c8e7abeb5763b3bd259cd7b3e162e67726d Mon Sep 17 00:00:00 2001 From: cuberoot74088 Date: Sat, 31 Jul 2021 07:13:46 +0000 Subject: [PATCH] Implement Online Backups (#25) (#536) --- Cargo.lock | 21 ++++ examples/server.toml | 23 ++++- kanidm_book/src/administrivia.md | 18 ++-- kanidm_book/src/server_configuration.md | 21 +++- kanidmd/Cargo.toml | 1 + kanidmd/src/lib/actors/v1_read.rs | 129 +++++++++++++++++++++++- kanidmd/src/lib/config.rs | 39 +++++++ kanidmd/src/lib/core/mod.rs | 9 ++ kanidmd/src/lib/event.rs | 21 ++++ kanidmd/src/lib/interval.rs | 91 ++++++++++++++++- kanidmd/src/server/main.rs | 4 +- 11 files changed, 357 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5362f258..f10b799da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1820,6 +1820,7 @@ dependencies = [ "rpassword", "rusqlite", "rustc_version 0.4.0", + "saffron", "serde", "serde_cbor", "serde_derive", @@ -2176,6 +2177,16 @@ dependencies = [ "version_check 0.1.5", ] +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "memchr", + "version_check 0.9.3", +] + [[package]] name = "nom" version = "6.1.2" @@ -2920,6 +2931,16 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "saffron" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03fb9a628596fc7590eb7edbf7b0613287be78df107f5f97b118aad59fb2eea9" +dependencies = [ + "chrono", + "nom 5.1.2", +] + [[package]] name = "same-file" version = "1.0.6" diff --git a/examples/server.toml b/examples/server.toml index 6c9b01541..894bf793d 100644 --- a/examples/server.toml +++ b/examples/server.toml @@ -36,15 +36,28 @@ db_path = "/var/lib/kanidm/kanidm.db" # origin = "https://idm.example.com" origin = "https://idm.example.com:8443" # +# +# [online_backup] +# The path to the output folder for online backups +# path = "/var/lib/kanidm/backups/" +# The schedule to run online backups - see https://crontab.guru/ +# every day at 22:00 UTC (default) +# schedule = "00 22 * * *" +# four times a day at 3 minutes past the hour, every 6th hours +# schedule = "03 */6 * * *" +# Number of backups to keep (default 7) +# versions = 7 +# +# # The role of this server. This affects features available and how replication may interact. # Valid roles are: -# - write_replica +# - WriteReplica # This server provides all functionality of Kanidm. It allows authentication, writes, and # the web user interface to be served. -# - write_replica_no_ui +# - WriteReplicaNoUI # This server is the same as a write_replica, but does NOT offer the web user interface. -# - read_only_replica +# - ReadOnlyReplica # This server will not writes initiated by clients. It supports authentication and reads, # and must have a replication agreement as a source of it's data. -# Defaults to "write_replica". -# role = "write_replica" \ No newline at end of file +# Defaults to "WriteReplica". +# role = "WriteReplica" \ No newline at end of file diff --git a/kanidm_book/src/administrivia.md b/kanidm_book/src/administrivia.md index a1e74d527..a538a9cd9 100644 --- a/kanidm_book/src/administrivia.md +++ b/kanidm_book/src/administrivia.md @@ -41,6 +41,13 @@ This is a simple backup of the data volume. # Backup your docker's volume folder docker start +## Method 3 + +Automatic backups can be generated online by a `kanidmd server` instance +by including the `[online_backup]` section in the `server.toml`. +This allows you to run regular backups, defined by a cron schedule, and maintain +the number of backup versions to keep. An example is located in [examples/server.toml](../../examples/server.toml). + # Rename the domain There are some cases where you may need to rename the domain. You should have configured @@ -66,7 +73,6 @@ you can then rename the domain with the commands as follows: -n idm.new.domain.name docker start - # Reindexing after schema extension In some (rare) cases you may need to reindex. @@ -96,12 +102,12 @@ Generally, reindexing is a rare action and should not normally be required. # Vacuum -[Vacuuming](https://www.sqlite.org/lang_vacuum.html) is the process of reclaiming un-used pages -from the sqlite freelists, as well as performing some data reordering tasks that may make some -queries more efficient . It is recommended that you vacuum after a reindex is performed or +[Vacuuming](https://www.sqlite.org/lang_vacuum.html) is the process of reclaiming un-used pages +from the sqlite freelists, as well as performing some data reordering tasks that may make some +queries more efficient . It is recommended that you vacuum after a reindex is performed or when you wish to reclaim space in the database file. -Vacuum is also able to change the pagesize of the database. After changing db\_fs\_type (which affects +Vacuum is also able to change the pagesize of the database. After changing `db_fs_type` (which affects pagesize) in server.toml, you must run a vacuum for this to take effect. docker stop @@ -114,7 +120,7 @@ pagesize) in server.toml, you must run a vacuum for this to take effect. The server ships with a number of verification utilities to ensure that data is consistent such as referential integrity or memberof. -Note that verification really is a last resort - the server does *a lot* to prevent and self-heal +Note that verification really is a last resort - the server does _a lot_ to prevent and self-heal from errors at run time, so you should rarely if ever require this utility. This utility was developed to guarantee consistency during development! diff --git a/kanidm_book/src/server_configuration.md b/kanidm_book/src/server_configuration.md index 4a865740d..13ea6750d 100644 --- a/kanidm_book/src/server_configuration.md +++ b/kanidm_book/src/server_configuration.md @@ -4,11 +4,11 @@ You will also need a config file in the volume named `server.toml` (Within the c # The webserver bind address. Will use HTTPS if tls_* is provided. # Defaults to "127.0.0.1:8443" - bindaddress = "127.0.0.1:8443" + bindaddress = "[::]:8443" # # The read-only ldap server bind address. The server will use LDAPS if tls_* is provided. # Defaults to "" (disabled) - # ldapbindaddress = "127.0.0.1:3636" + # ldapbindaddress = "[::]:3636" # # The path to the kanidm database. db_path = "/data/kanidm.db" @@ -40,6 +40,19 @@ You will also need a config file in the volume named `server.toml` (Within the c # origin = "https://idm.example.com" origin = "https://idm.example.com:8443" # + # + # [online_backup] + # The path to the output folder for online backups + # path = "/var/lib/kanidm/backups/" + # The schedule to run online backups - see https://crontab.guru/ + # every day at 22:00 UTC (default) + # schedule = "00 22 * * *" + # four times a day at 3 minutes past the hour, every 6th hours + # schedule = "03 */6 * * *" + # Number of backups to keep (default 7) + # versions = 7 + # + # # The role of this server. This affects features available and how replication may interact. # Valid roles are: # - WriteReplica @@ -60,11 +73,11 @@ Then you can setup the initial admin account and initialise the database into yo docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd recover_account -c /data/server.toml -n admin You then want to set your domain name so that security principal names (spn's) are generated correctly. -This domain name *must* match the url/origin of the server that you plan to use to interact with +This domain name _must_ match the url/origin of the server that you plan to use to interact with so that other features work correctly. It is possible to change this domain name later. docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd domain_name_change -c /data/server.toml -n idm.example.com Now we can run the server so that it can accept connections. This defaults to using `-c /data/server.toml` - docker run -p 8443:8443 -v kanidmd:/data kanidm/server:latest \ No newline at end of file + docker run -p 8443:8443 -v kanidmd:/data kanidm/server:latest diff --git a/kanidmd/Cargo.toml b/kanidmd/Cargo.toml index 3e2b2afef..20b746df4 100644 --- a/kanidmd/Cargo.toml +++ b/kanidmd/Cargo.toml @@ -38,6 +38,7 @@ rand = "0.8" toml = "0.5" chrono = "0.4" +saffron = "0.1.0" regex = "1" lazy_static = "1.2.0" diff --git a/kanidmd/src/lib/actors/v1_read.rs b/kanidmd/src/lib/actors/v1_read.rs index a161c7431..9a95a6663 100644 --- a/kanidmd/src/lib/actors/v1_read.rs +++ b/kanidmd/src/lib/actors/v1_read.rs @@ -1,10 +1,15 @@ use tokio::sync::mpsc::UnboundedSender as Sender; +use chrono::{DateTime, SecondsFormat, Utc}; use std::sync::Arc; use crate::prelude::*; -use crate::event::{AuthEvent, AuthResult, SearchEvent, SearchResult, WhoamiResult}; +use crate::be::BackendTransaction; + +use crate::event::{ + AuthEvent, AuthResult, OnlineBackupEvent, SearchEvent, SearchResult, WhoamiResult, +}; use crate::idm::event::{ CredentialStatusEvent, RadiusAuthTokenEvent, ReadBackupCodeEvent, UnixGroupTokenEvent, UnixUserAuthEvent, UnixUserTokenEvent, @@ -26,6 +31,9 @@ use kanidm_proto::v1::{ WhoamiResponse, }; +use regex::Regex; +use std::fs; +use std::path::{Path, PathBuf}; use uuid::Uuid; use ldap3_server::simple::*; @@ -167,6 +175,125 @@ impl QueryServerReadV1 { res } + pub async fn handle_online_backup( + &self, + msg: OnlineBackupEvent, + outpath: &str, + versions: usize, + ) { + let mut audit = AuditScope::new("online backup", msg.eventid, self.log_level); + + ltrace!(audit, "Begin online backup event {:?}", msg.eventid); + + let now: DateTime = Utc::now(); + let timestamp = now.to_rfc3339_opts(SecondsFormat::Secs, true); + let dest_file = format!("{}/backup-{}.json", outpath, timestamp); + + match Path::new(&dest_file).exists() { + true => { + error!( + "Online backup file {} already exists, will not owerwrite it.", + dest_file + ); + } + false => { + let idms_prox_read = self.idms.proxy_read_async().await; + lperf_op_segment!( + &mut audit, + "actors::v1_read::handle", + || { + let res = idms_prox_read + .qs_read + .get_be_txn() + .backup(&mut audit, &dest_file); + + match &res { + Ok(()) => { + info!("Online backup created {} successfully", dest_file); + } + Err(e) => { + error!("Online backup failed to create {}: {:?}", dest_file, e); + } + } + + ladmin_info!(audit, "online backup result: {:?}", res); + } + ); + } + } + + // cleanup of maximum backup versions to keep + let mut backup_file_list: Vec = Vec::new(); + // pattern to find automatically generated backup files + let re = Regex::new(r"^backup-\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\.json$") + .expect("Failed to parse regexp for online backup files."); + + // get a list of backup files + match fs::read_dir(outpath) { + Ok(rd) => { + for entry in rd { + // get PathBuf + let pb = entry.unwrap().path(); + + // skip everything that is not a file + if !pb.is_file() { + continue; + } + + // get the /some/dir/ of the file + let file_name = pb.file_name().unwrap().to_str().unwrap(); + // check for a online backup file + if re.is_match(file_name) { + backup_file_list.push(pb.clone()); + } + } + } + Err(e) => { + error!("Online backup cleanup error read dir {}: {}", outpath, e); + } + } + + // sort it to have items listed old to new + backup_file_list.sort(); + + // Versions: OLD 10.9.8.7.6.5.4.3.2.1 NEW + // |----delete----|keep| + // 10 items, we want to keep the latest 3 + + // if we have more files then we want to keep, me do some cleanup + if backup_file_list.len() > versions { + let x = backup_file_list.len() - versions; + info!( + "Online backup cleanup found {} versions, should keep {}, will remove {}", + backup_file_list.len(), + versions, + x + ); + backup_file_list.truncate(x); + + // removing files + for file in backup_file_list { + debug!("Online backup cleanup: removing {:?}", &file); + match fs::remove_file(&file) { + Ok(_) => {} + Err(e) => { + error!( + "Online backup cleanup failed to remove file {:?}: {:?}", + file, e + ) + } + }; + } + } else { + debug!("Online backup cleanup had no files to remove"); + }; + + // At the end of the event we send it for logging. + self.log.send(audit).unwrap_or_else(|_| { + error!("CRITICAL: UNABLE TO COMMIT LOGS"); + }); + } + pub async fn handle_whoami( &self, uat: Option, diff --git a/kanidmd/src/lib/config.rs b/kanidmd/src/lib/config.rs index c3add0423..77684b98b 100644 --- a/kanidmd/src/lib/config.rs +++ b/kanidmd/src/lib/config.rs @@ -14,6 +14,23 @@ pub struct IntegrationTestConfig { pub admin_password: String, } +#[derive(Serialize, Deserialize, Debug)] +pub struct OnlineBackup { + pub path: String, + #[serde(default = "default_online_backup_schedule")] + pub schedule: String, + #[serde(default = "default_online_backup_versions")] + pub versions: usize, +} + +fn default_online_backup_schedule() -> String { + "00 22 * * *".to_string() +} + +fn default_online_backup_versions() -> usize { + 7 +} + #[derive(Serialize, Deserialize, Debug)] pub struct TlsConfiguration { pub chain: String, @@ -71,6 +88,7 @@ pub struct Configuration { pub cookie_key: [u8; 32], pub integration_test_config: Option>, pub log_level: Option, + pub online_backup: Option, pub origin: String, pub role: ServerRole, } @@ -95,6 +113,10 @@ impl fmt::Display for Configuration { Some(u) => write!(f, "with log_level: {:x}, ", u), None => write!(f, "with log_level: default, "), }) + .and_then(|_| match &self.online_backup { + Some(_) => write!(f, "with online_backup: enabled, "), + None => write!(f, "with online_backup: disabled, "), + }) .and_then(|_| write!(f, "role: {}, ", self.role.to_string())) .and_then(|_| { write!( @@ -124,6 +146,7 @@ impl Configuration { cookie_key: [0; 32], integration_test_config: None, log_level: None, + online_backup: None, origin: "https://idm.example.com".to_string(), role: ServerRole::WriteReplica, }; @@ -136,6 +159,22 @@ impl Configuration { self.log_level = log_level; } + pub fn update_online_backup(&mut self, cfg: &Option) { + match cfg { + None => {} + Some(cfg) => { + let path = cfg.path.to_string(); + let schedule = cfg.schedule.to_string(); + let versions = cfg.versions; + self.online_backup = Some(OnlineBackup { + path, + schedule, + versions, + }) + } + } + } + pub fn update_db_path(&mut self, p: &str) { self.db_path = p.to_string(); } diff --git a/kanidmd/src/lib/core/mod.rs b/kanidmd/src/lib/core/mod.rs index 40871090c..e032090c3 100644 --- a/kanidmd/src/lib/core/mod.rs +++ b/kanidmd/src/lib/core/mod.rs @@ -702,6 +702,15 @@ pub async fn create_server_core(config: Configuration) -> Result<(), ()> { // Setup timed events associated to the write thread IntervalActor::start(server_write_ref); + // Setup timed events associated to the read thread + match &config.online_backup { + Some(cfg) => { + IntervalActor::start_online_backup(server_read_ref, &cfg)?; + } + None => { + debug!("Online backup not requested, skipping"); + } + }; // If we have been requested to init LDAP, configure it now. match &config.ldapaddress { diff --git a/kanidmd/src/lib/event.rs b/kanidmd/src/lib/event.rs index 2bd414bde..5778fb415 100644 --- a/kanidmd/src/lib/event.rs +++ b/kanidmd/src/lib/event.rs @@ -910,6 +910,27 @@ impl PurgeRecycledEvent { } } +#[derive(Debug)] +pub struct OnlineBackupEvent { + pub ident: Identity, + pub eventid: Uuid, +} + +impl Default for OnlineBackupEvent { + fn default() -> Self { + Self::new() + } +} + +impl OnlineBackupEvent { + pub fn new() -> Self { + OnlineBackupEvent { + ident: Identity::from_internal(), + eventid: Uuid::new_v4(), + } + } +} + #[derive(Debug)] pub struct ReviveRecycledEvent { pub ident: Identity, diff --git a/kanidmd/src/lib/interval.rs b/kanidmd/src/lib/interval.rs index 85c13a9a4..8653b209d 100644 --- a/kanidmd/src/lib/interval.rs +++ b/kanidmd/src/lib/interval.rs @@ -1,11 +1,19 @@ //! This contains scheduled tasks/interval tasks that are run inside of the server on a schedule //! as background operations. +use crate::actors::v1_read::QueryServerReadV1; use crate::actors::v1_write::QueryServerWriteV1; -use crate::constants::PURGE_FREQUENCY; -use crate::event::{PurgeRecycledEvent, PurgeTombstoneEvent}; -use tokio::time::{interval, Duration}; +use crate::config::OnlineBackup; +use crate::constants::PURGE_FREQUENCY; +use crate::event::{OnlineBackupEvent, PurgeRecycledEvent, PurgeTombstoneEvent}; + +use chrono::Utc; +use saffron::parse::{CronExpr, English}; +use saffron::Cron; +use std::fs; +use std::path::Path; +use tokio::time::{interval, sleep, Duration}; pub struct IntervalActor; @@ -24,4 +32,81 @@ impl IntervalActor { } }); } + + pub fn start_online_backup( + server: &'static QueryServerReadV1, + cfg: &OnlineBackup, + ) -> Result<(), ()> { + let outpath = cfg.path.to_owned(); + let schedule = cfg.schedule.to_owned(); + let versions = cfg.versions; + + // Cron expression handling + let cron_expr = schedule.as_str().parse::().map_err(|e| { + error!("Online backup schedule parse error: {}", e); + })?; + + info!( + "Online backup schedule parsed as: {}", + cron_expr.describe(English::default()) + ); + + if !Cron::new(cron_expr.clone()).any() { + error!( + "Online backup schedule error: '{}' will not match any date.", + schedule + ); + return Err(()); + } + + // Output path handling + let op = Path::new(&outpath); + + // does the path exist and is a directory? + if !op.exists() { + info!( + "Online backup output folder '{}' does not exist, trying to create it.", + outpath + ); + fs::create_dir_all(&outpath).map_err(|e| { + error!( + "Online backup failed to create output directory '{}': {}", + outpath.clone(), + e + ) + })?; + } + + if !op.is_dir() { + error!("Online backup output '{}' is not a directory or we are missing permissions to access it.", outpath); + return Err(()); + } + + tokio::spawn(async move { + let ct = Utc::now(); + let cron = Cron::new(cron_expr.clone()); + + let cron_iter = cron.clone().iter_after(ct); + for next_time in cron_iter { + // We add 1 second to the `wait_time` in order to get "even" timestampes + // for example: 1 + 17:05:59Z --> 17:06:00Z + let wait_seconds = 1 + (next_time - Utc::now()).num_seconds() as u64; + info!( + "Online backup next run on {}, wait_time = {}s", + next_time, wait_seconds + ); + + sleep(Duration::from_secs(wait_seconds)).await; + server + .handle_online_backup( + OnlineBackupEvent::new(), + outpath.clone().as_str(), + versions, + ) + .await; + } + }); + + Ok(()) + } } diff --git a/kanidmd/src/server/main.rs b/kanidmd/src/server/main.rs index 96e4b21fe..c6066e7e0 100644 --- a/kanidmd/src/server/main.rs +++ b/kanidmd/src/server/main.rs @@ -26,7 +26,7 @@ use std::path::PathBuf; use std::str::FromStr; use kanidm::audit::LogLevel; -use kanidm::config::{Configuration, ServerRole}; +use kanidm::config::{Configuration, OnlineBackup, ServerRole}; use kanidm::core::{ backup_server_core, create_server_core, dbscan_get_id2entry_core, dbscan_list_id2entry_core, dbscan_list_index_analysis_core, dbscan_list_index_core, dbscan_list_indexes_core, @@ -50,6 +50,7 @@ struct ServerConfig { pub tls_chain: Option, pub tls_key: Option, pub log_level: Option, + pub online_backup: Option, pub origin: String, #[serde(default)] pub role: ServerRole, @@ -223,6 +224,7 @@ async fn main() { config.update_tls(&sconfig.tls_chain, &sconfig.tls_key); config.update_bind(&sconfig.bindaddress); config.update_ldapbind(&sconfig.ldapbindaddress); + config.update_online_backup(&sconfig.online_backup); if let Some(i_str) = &(sconfig.tls_chain) { let i_path = PathBuf::from(i_str.as_str());