Implement Online Backups (#25) (#536)

This commit is contained in:
cuberoot74088 2021-07-31 07:13:46 +00:00 committed by GitHub
parent 7b60d9d03d
commit b4f99c8e7a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 357 additions and 20 deletions

21
Cargo.lock generated
View file

@ -1820,6 +1820,7 @@ dependencies = [
"rpassword", "rpassword",
"rusqlite", "rusqlite",
"rustc_version 0.4.0", "rustc_version 0.4.0",
"saffron",
"serde", "serde",
"serde_cbor", "serde_cbor",
"serde_derive", "serde_derive",
@ -2176,6 +2177,16 @@ dependencies = [
"version_check 0.1.5", "version_check 0.1.5",
] ]
[[package]]
name = "nom"
version = "5.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
dependencies = [
"memchr",
"version_check 0.9.3",
]
[[package]] [[package]]
name = "nom" name = "nom"
version = "6.1.2" version = "6.1.2"
@ -2920,6 +2931,16 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "saffron"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03fb9a628596fc7590eb7edbf7b0613287be78df107f5f97b118aad59fb2eea9"
dependencies = [
"chrono",
"nom 5.1.2",
]
[[package]] [[package]]
name = "same-file" name = "same-file"
version = "1.0.6" version = "1.0.6"

View file

@ -36,15 +36,28 @@ db_path = "/var/lib/kanidm/kanidm.db"
# origin = "https://idm.example.com" # origin = "https://idm.example.com"
origin = "https://idm.example.com:8443" origin = "https://idm.example.com:8443"
# #
#
# [online_backup]
# The path to the output folder for online backups
# path = "/var/lib/kanidm/backups/"
# The schedule to run online backups - see https://crontab.guru/
# every day at 22:00 UTC (default)
# schedule = "00 22 * * *"
# four times a day at 3 minutes past the hour, every 6th hours
# schedule = "03 */6 * * *"
# Number of backups to keep (default 7)
# versions = 7
#
#
# The role of this server. This affects features available and how replication may interact. # The role of this server. This affects features available and how replication may interact.
# Valid roles are: # Valid roles are:
# - write_replica # - WriteReplica
# This server provides all functionality of Kanidm. It allows authentication, writes, and # This server provides all functionality of Kanidm. It allows authentication, writes, and
# the web user interface to be served. # the web user interface to be served.
# - write_replica_no_ui # - WriteReplicaNoUI
# This server is the same as a write_replica, but does NOT offer the web user interface. # This server is the same as a write_replica, but does NOT offer the web user interface.
# - read_only_replica # - ReadOnlyReplica
# This server will not writes initiated by clients. It supports authentication and reads, # This server will not writes initiated by clients. It supports authentication and reads,
# and must have a replication agreement as a source of it's data. # and must have a replication agreement as a source of it's data.
# Defaults to "write_replica". # Defaults to "WriteReplica".
# role = "write_replica" # role = "WriteReplica"

View file

@ -41,6 +41,13 @@ This is a simple backup of the data volume.
# Backup your docker's volume folder # Backup your docker's volume folder
docker start <container name> docker start <container name>
## Method 3
Automatic backups can be generated online by a `kanidmd server` instance
by including the `[online_backup]` section in the `server.toml`.
This allows you to run regular backups, defined by a cron schedule, and maintain
the number of backup versions to keep. An example is located in [examples/server.toml](../../examples/server.toml).
# Rename the domain # Rename the domain
There are some cases where you may need to rename the domain. You should have configured There are some cases where you may need to rename the domain. You should have configured
@ -66,7 +73,6 @@ you can then rename the domain with the commands as follows:
-n idm.new.domain.name -n idm.new.domain.name
docker start <container name> docker start <container name>
# Reindexing after schema extension # Reindexing after schema extension
In some (rare) cases you may need to reindex. In some (rare) cases you may need to reindex.
@ -101,7 +107,7 @@ from the sqlite freelists, as well as performing some data reordering tasks that
queries more efficient . It is recommended that you vacuum after a reindex is performed or queries more efficient . It is recommended that you vacuum after a reindex is performed or
when you wish to reclaim space in the database file. when you wish to reclaim space in the database file.
Vacuum is also able to change the pagesize of the database. After changing db\_fs\_type (which affects Vacuum is also able to change the pagesize of the database. After changing `db_fs_type` (which affects
pagesize) in server.toml, you must run a vacuum for this to take effect. pagesize) in server.toml, you must run a vacuum for this to take effect.
docker stop <container name> docker stop <container name>
@ -114,7 +120,7 @@ pagesize) in server.toml, you must run a vacuum for this to take effect.
The server ships with a number of verification utilities to ensure that data is consistent such The server ships with a number of verification utilities to ensure that data is consistent such
as referential integrity or memberof. as referential integrity or memberof.
Note that verification really is a last resort - the server does *a lot* to prevent and self-heal Note that verification really is a last resort - the server does _a lot_ to prevent and self-heal
from errors at run time, so you should rarely if ever require this utility. This utility was from errors at run time, so you should rarely if ever require this utility. This utility was
developed to guarantee consistency during development! developed to guarantee consistency during development!

View file

@ -4,11 +4,11 @@ You will also need a config file in the volume named `server.toml` (Within the c
# The webserver bind address. Will use HTTPS if tls_* is provided. # The webserver bind address. Will use HTTPS if tls_* is provided.
# Defaults to "127.0.0.1:8443" # Defaults to "127.0.0.1:8443"
bindaddress = "127.0.0.1:8443" bindaddress = "[::]:8443"
# #
# The read-only ldap server bind address. The server will use LDAPS if tls_* is provided. # The read-only ldap server bind address. The server will use LDAPS if tls_* is provided.
# Defaults to "" (disabled) # Defaults to "" (disabled)
# ldapbindaddress = "127.0.0.1:3636" # ldapbindaddress = "[::]:3636"
# #
# The path to the kanidm database. # The path to the kanidm database.
db_path = "/data/kanidm.db" db_path = "/data/kanidm.db"
@ -40,6 +40,19 @@ You will also need a config file in the volume named `server.toml` (Within the c
# origin = "https://idm.example.com" # origin = "https://idm.example.com"
origin = "https://idm.example.com:8443" origin = "https://idm.example.com:8443"
# #
#
# [online_backup]
# The path to the output folder for online backups
# path = "/var/lib/kanidm/backups/"
# The schedule to run online backups - see https://crontab.guru/
# every day at 22:00 UTC (default)
# schedule = "00 22 * * *"
# four times a day at 3 minutes past the hour, every 6th hours
# schedule = "03 */6 * * *"
# Number of backups to keep (default 7)
# versions = 7
#
#
# The role of this server. This affects features available and how replication may interact. # The role of this server. This affects features available and how replication may interact.
# Valid roles are: # Valid roles are:
# - WriteReplica # - WriteReplica
@ -60,7 +73,7 @@ Then you can setup the initial admin account and initialise the database into yo
docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd recover_account -c /data/server.toml -n admin docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd recover_account -c /data/server.toml -n admin
You then want to set your domain name so that security principal names (spn's) are generated correctly. You then want to set your domain name so that security principal names (spn's) are generated correctly.
This domain name *must* match the url/origin of the server that you plan to use to interact with This domain name _must_ match the url/origin of the server that you plan to use to interact with
so that other features work correctly. It is possible to change this domain name later. so that other features work correctly. It is possible to change this domain name later.
docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd domain_name_change -c /data/server.toml -n idm.example.com docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd domain_name_change -c /data/server.toml -n idm.example.com

View file

@ -38,6 +38,7 @@ rand = "0.8"
toml = "0.5" toml = "0.5"
chrono = "0.4" chrono = "0.4"
saffron = "0.1.0"
regex = "1" regex = "1"
lazy_static = "1.2.0" lazy_static = "1.2.0"

View file

@ -1,10 +1,15 @@
use tokio::sync::mpsc::UnboundedSender as Sender; use tokio::sync::mpsc::UnboundedSender as Sender;
use chrono::{DateTime, SecondsFormat, Utc};
use std::sync::Arc; use std::sync::Arc;
use crate::prelude::*; use crate::prelude::*;
use crate::event::{AuthEvent, AuthResult, SearchEvent, SearchResult, WhoamiResult}; use crate::be::BackendTransaction;
use crate::event::{
AuthEvent, AuthResult, OnlineBackupEvent, SearchEvent, SearchResult, WhoamiResult,
};
use crate::idm::event::{ use crate::idm::event::{
CredentialStatusEvent, RadiusAuthTokenEvent, ReadBackupCodeEvent, UnixGroupTokenEvent, CredentialStatusEvent, RadiusAuthTokenEvent, ReadBackupCodeEvent, UnixGroupTokenEvent,
UnixUserAuthEvent, UnixUserTokenEvent, UnixUserAuthEvent, UnixUserTokenEvent,
@ -26,6 +31,9 @@ use kanidm_proto::v1::{
WhoamiResponse, WhoamiResponse,
}; };
use regex::Regex;
use std::fs;
use std::path::{Path, PathBuf};
use uuid::Uuid; use uuid::Uuid;
use ldap3_server::simple::*; use ldap3_server::simple::*;
@ -167,6 +175,125 @@ impl QueryServerReadV1 {
res res
} }
pub async fn handle_online_backup(
&self,
msg: OnlineBackupEvent,
outpath: &str,
versions: usize,
) {
let mut audit = AuditScope::new("online backup", msg.eventid, self.log_level);
ltrace!(audit, "Begin online backup event {:?}", msg.eventid);
let now: DateTime<Utc> = Utc::now();
let timestamp = now.to_rfc3339_opts(SecondsFormat::Secs, true);
let dest_file = format!("{}/backup-{}.json", outpath, timestamp);
match Path::new(&dest_file).exists() {
true => {
error!(
"Online backup file {} already exists, will not owerwrite it.",
dest_file
);
}
false => {
let idms_prox_read = self.idms.proxy_read_async().await;
lperf_op_segment!(
&mut audit,
"actors::v1_read::handle<OnlineBackupEvent>",
|| {
let res = idms_prox_read
.qs_read
.get_be_txn()
.backup(&mut audit, &dest_file);
match &res {
Ok(()) => {
info!("Online backup created {} successfully", dest_file);
}
Err(e) => {
error!("Online backup failed to create {}: {:?}", dest_file, e);
}
}
ladmin_info!(audit, "online backup result: {:?}", res);
}
);
}
}
// cleanup of maximum backup versions to keep
let mut backup_file_list: Vec<PathBuf> = Vec::new();
// pattern to find automatically generated backup files
let re = Regex::new(r"^backup-\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\.json$")
.expect("Failed to parse regexp for online backup files.");
// get a list of backup files
match fs::read_dir(outpath) {
Ok(rd) => {
for entry in rd {
// get PathBuf
let pb = entry.unwrap().path();
// skip everything that is not a file
if !pb.is_file() {
continue;
}
// get the /some/dir/<file_name> of the file
let file_name = pb.file_name().unwrap().to_str().unwrap();
// check for a online backup file
if re.is_match(file_name) {
backup_file_list.push(pb.clone());
}
}
}
Err(e) => {
error!("Online backup cleanup error read dir {}: {}", outpath, e);
}
}
// sort it to have items listed old to new
backup_file_list.sort();
// Versions: OLD 10.9.8.7.6.5.4.3.2.1 NEW
// |----delete----|keep|
// 10 items, we want to keep the latest 3
// if we have more files then we want to keep, me do some cleanup
if backup_file_list.len() > versions {
let x = backup_file_list.len() - versions;
info!(
"Online backup cleanup found {} versions, should keep {}, will remove {}",
backup_file_list.len(),
versions,
x
);
backup_file_list.truncate(x);
// removing files
for file in backup_file_list {
debug!("Online backup cleanup: removing {:?}", &file);
match fs::remove_file(&file) {
Ok(_) => {}
Err(e) => {
error!(
"Online backup cleanup failed to remove file {:?}: {:?}",
file, e
)
}
};
}
} else {
debug!("Online backup cleanup had no files to remove");
};
// At the end of the event we send it for logging.
self.log.send(audit).unwrap_or_else(|_| {
error!("CRITICAL: UNABLE TO COMMIT LOGS");
});
}
pub async fn handle_whoami( pub async fn handle_whoami(
&self, &self,
uat: Option<String>, uat: Option<String>,

View file

@ -14,6 +14,23 @@ pub struct IntegrationTestConfig {
pub admin_password: String, pub admin_password: String,
} }
#[derive(Serialize, Deserialize, Debug)]
pub struct OnlineBackup {
pub path: String,
#[serde(default = "default_online_backup_schedule")]
pub schedule: String,
#[serde(default = "default_online_backup_versions")]
pub versions: usize,
}
fn default_online_backup_schedule() -> String {
"00 22 * * *".to_string()
}
fn default_online_backup_versions() -> usize {
7
}
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
pub struct TlsConfiguration { pub struct TlsConfiguration {
pub chain: String, pub chain: String,
@ -71,6 +88,7 @@ pub struct Configuration {
pub cookie_key: [u8; 32], pub cookie_key: [u8; 32],
pub integration_test_config: Option<Box<IntegrationTestConfig>>, pub integration_test_config: Option<Box<IntegrationTestConfig>>,
pub log_level: Option<u32>, pub log_level: Option<u32>,
pub online_backup: Option<OnlineBackup>,
pub origin: String, pub origin: String,
pub role: ServerRole, pub role: ServerRole,
} }
@ -95,6 +113,10 @@ impl fmt::Display for Configuration {
Some(u) => write!(f, "with log_level: {:x}, ", u), Some(u) => write!(f, "with log_level: {:x}, ", u),
None => write!(f, "with log_level: default, "), None => write!(f, "with log_level: default, "),
}) })
.and_then(|_| match &self.online_backup {
Some(_) => write!(f, "with online_backup: enabled, "),
None => write!(f, "with online_backup: disabled, "),
})
.and_then(|_| write!(f, "role: {}, ", self.role.to_string())) .and_then(|_| write!(f, "role: {}, ", self.role.to_string()))
.and_then(|_| { .and_then(|_| {
write!( write!(
@ -124,6 +146,7 @@ impl Configuration {
cookie_key: [0; 32], cookie_key: [0; 32],
integration_test_config: None, integration_test_config: None,
log_level: None, log_level: None,
online_backup: None,
origin: "https://idm.example.com".to_string(), origin: "https://idm.example.com".to_string(),
role: ServerRole::WriteReplica, role: ServerRole::WriteReplica,
}; };
@ -136,6 +159,22 @@ impl Configuration {
self.log_level = log_level; self.log_level = log_level;
} }
pub fn update_online_backup(&mut self, cfg: &Option<OnlineBackup>) {
match cfg {
None => {}
Some(cfg) => {
let path = cfg.path.to_string();
let schedule = cfg.schedule.to_string();
let versions = cfg.versions;
self.online_backup = Some(OnlineBackup {
path,
schedule,
versions,
})
}
}
}
pub fn update_db_path(&mut self, p: &str) { pub fn update_db_path(&mut self, p: &str) {
self.db_path = p.to_string(); self.db_path = p.to_string();
} }

View file

@ -702,6 +702,15 @@ pub async fn create_server_core(config: Configuration) -> Result<(), ()> {
// Setup timed events associated to the write thread // Setup timed events associated to the write thread
IntervalActor::start(server_write_ref); IntervalActor::start(server_write_ref);
// Setup timed events associated to the read thread
match &config.online_backup {
Some(cfg) => {
IntervalActor::start_online_backup(server_read_ref, &cfg)?;
}
None => {
debug!("Online backup not requested, skipping");
}
};
// If we have been requested to init LDAP, configure it now. // If we have been requested to init LDAP, configure it now.
match &config.ldapaddress { match &config.ldapaddress {

View file

@ -910,6 +910,27 @@ impl PurgeRecycledEvent {
} }
} }
#[derive(Debug)]
pub struct OnlineBackupEvent {
pub ident: Identity,
pub eventid: Uuid,
}
impl Default for OnlineBackupEvent {
fn default() -> Self {
Self::new()
}
}
impl OnlineBackupEvent {
pub fn new() -> Self {
OnlineBackupEvent {
ident: Identity::from_internal(),
eventid: Uuid::new_v4(),
}
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct ReviveRecycledEvent { pub struct ReviveRecycledEvent {
pub ident: Identity, pub ident: Identity,

View file

@ -1,11 +1,19 @@
//! This contains scheduled tasks/interval tasks that are run inside of the server on a schedule //! This contains scheduled tasks/interval tasks that are run inside of the server on a schedule
//! as background operations. //! as background operations.
use crate::actors::v1_read::QueryServerReadV1;
use crate::actors::v1_write::QueryServerWriteV1; use crate::actors::v1_write::QueryServerWriteV1;
use crate::constants::PURGE_FREQUENCY;
use crate::event::{PurgeRecycledEvent, PurgeTombstoneEvent};
use tokio::time::{interval, Duration}; use crate::config::OnlineBackup;
use crate::constants::PURGE_FREQUENCY;
use crate::event::{OnlineBackupEvent, PurgeRecycledEvent, PurgeTombstoneEvent};
use chrono::Utc;
use saffron::parse::{CronExpr, English};
use saffron::Cron;
use std::fs;
use std::path::Path;
use tokio::time::{interval, sleep, Duration};
pub struct IntervalActor; pub struct IntervalActor;
@ -24,4 +32,81 @@ impl IntervalActor {
} }
}); });
} }
pub fn start_online_backup(
server: &'static QueryServerReadV1,
cfg: &OnlineBackup,
) -> Result<(), ()> {
let outpath = cfg.path.to_owned();
let schedule = cfg.schedule.to_owned();
let versions = cfg.versions;
// Cron expression handling
let cron_expr = schedule.as_str().parse::<CronExpr>().map_err(|e| {
error!("Online backup schedule parse error: {}", e);
})?;
info!(
"Online backup schedule parsed as: {}",
cron_expr.describe(English::default())
);
if !Cron::new(cron_expr.clone()).any() {
error!(
"Online backup schedule error: '{}' will not match any date.",
schedule
);
return Err(());
}
// Output path handling
let op = Path::new(&outpath);
// does the path exist and is a directory?
if !op.exists() {
info!(
"Online backup output folder '{}' does not exist, trying to create it.",
outpath
);
fs::create_dir_all(&outpath).map_err(|e| {
error!(
"Online backup failed to create output directory '{}': {}",
outpath.clone(),
e
)
})?;
}
if !op.is_dir() {
error!("Online backup output '{}' is not a directory or we are missing permissions to access it.", outpath);
return Err(());
}
tokio::spawn(async move {
let ct = Utc::now();
let cron = Cron::new(cron_expr.clone());
let cron_iter = cron.clone().iter_after(ct);
for next_time in cron_iter {
// We add 1 second to the `wait_time` in order to get "even" timestampes
// for example: 1 + 17:05:59Z --> 17:06:00Z
let wait_seconds = 1 + (next_time - Utc::now()).num_seconds() as u64;
info!(
"Online backup next run on {}, wait_time = {}s",
next_time, wait_seconds
);
sleep(Duration::from_secs(wait_seconds)).await;
server
.handle_online_backup(
OnlineBackupEvent::new(),
outpath.clone().as_str(),
versions,
)
.await;
}
});
Ok(())
}
} }

View file

@ -26,7 +26,7 @@ use std::path::PathBuf;
use std::str::FromStr; use std::str::FromStr;
use kanidm::audit::LogLevel; use kanidm::audit::LogLevel;
use kanidm::config::{Configuration, ServerRole}; use kanidm::config::{Configuration, OnlineBackup, ServerRole};
use kanidm::core::{ use kanidm::core::{
backup_server_core, create_server_core, dbscan_get_id2entry_core, dbscan_list_id2entry_core, backup_server_core, create_server_core, dbscan_get_id2entry_core, dbscan_list_id2entry_core,
dbscan_list_index_analysis_core, dbscan_list_index_core, dbscan_list_indexes_core, dbscan_list_index_analysis_core, dbscan_list_index_core, dbscan_list_indexes_core,
@ -50,6 +50,7 @@ struct ServerConfig {
pub tls_chain: Option<String>, pub tls_chain: Option<String>,
pub tls_key: Option<String>, pub tls_key: Option<String>,
pub log_level: Option<String>, pub log_level: Option<String>,
pub online_backup: Option<OnlineBackup>,
pub origin: String, pub origin: String,
#[serde(default)] #[serde(default)]
pub role: ServerRole, pub role: ServerRole,
@ -223,6 +224,7 @@ async fn main() {
config.update_tls(&sconfig.tls_chain, &sconfig.tls_key); config.update_tls(&sconfig.tls_chain, &sconfig.tls_key);
config.update_bind(&sconfig.bindaddress); config.update_bind(&sconfig.bindaddress);
config.update_ldapbind(&sconfig.ldapbindaddress); config.update_ldapbind(&sconfig.ldapbindaddress);
config.update_online_backup(&sconfig.online_backup);
if let Some(i_str) = &(sconfig.tls_chain) { if let Some(i_str) = &(sconfig.tls_chain) {
let i_path = PathBuf::from(i_str.as_str()); let i_path = PathBuf::from(i_str.as_str());