Implement Online Backups (#25) (#536)

This commit is contained in:
cuberoot74088 2021-07-31 07:13:46 +00:00 committed by GitHub
parent 7b60d9d03d
commit b4f99c8e7a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 357 additions and 20 deletions

21
Cargo.lock generated
View file

@ -1820,6 +1820,7 @@ dependencies = [
"rpassword",
"rusqlite",
"rustc_version 0.4.0",
"saffron",
"serde",
"serde_cbor",
"serde_derive",
@ -2176,6 +2177,16 @@ dependencies = [
"version_check 0.1.5",
]
[[package]]
name = "nom"
version = "5.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
dependencies = [
"memchr",
"version_check 0.9.3",
]
[[package]]
name = "nom"
version = "6.1.2"
@ -2920,6 +2931,16 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "saffron"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03fb9a628596fc7590eb7edbf7b0613287be78df107f5f97b118aad59fb2eea9"
dependencies = [
"chrono",
"nom 5.1.2",
]
[[package]]
name = "same-file"
version = "1.0.6"

View file

@ -36,15 +36,28 @@ db_path = "/var/lib/kanidm/kanidm.db"
# origin = "https://idm.example.com"
origin = "https://idm.example.com:8443"
#
#
# [online_backup]
# The path to the output folder for online backups
# path = "/var/lib/kanidm/backups/"
# The schedule to run online backups - see https://crontab.guru/
# every day at 22:00 UTC (default)
# schedule = "00 22 * * *"
# four times a day at 3 minutes past the hour, every 6th hours
# schedule = "03 */6 * * *"
# Number of backups to keep (default 7)
# versions = 7
#
#
# The role of this server. This affects features available and how replication may interact.
# Valid roles are:
# - write_replica
# - WriteReplica
# This server provides all functionality of Kanidm. It allows authentication, writes, and
# the web user interface to be served.
# - write_replica_no_ui
# - WriteReplicaNoUI
# This server is the same as a write_replica, but does NOT offer the web user interface.
# - read_only_replica
# - ReadOnlyReplica
# This server will not writes initiated by clients. It supports authentication and reads,
# and must have a replication agreement as a source of it's data.
# Defaults to "write_replica".
# role = "write_replica"
# Defaults to "WriteReplica".
# role = "WriteReplica"

View file

@ -41,6 +41,13 @@ This is a simple backup of the data volume.
# Backup your docker's volume folder
docker start <container name>
## Method 3
Automatic backups can be generated online by a `kanidmd server` instance
by including the `[online_backup]` section in the `server.toml`.
This allows you to run regular backups, defined by a cron schedule, and maintain
the number of backup versions to keep. An example is located in [examples/server.toml](../../examples/server.toml).
# Rename the domain
There are some cases where you may need to rename the domain. You should have configured
@ -66,7 +73,6 @@ you can then rename the domain with the commands as follows:
-n idm.new.domain.name
docker start <container name>
# Reindexing after schema extension
In some (rare) cases you may need to reindex.
@ -101,7 +107,7 @@ from the sqlite freelists, as well as performing some data reordering tasks that
queries more efficient . It is recommended that you vacuum after a reindex is performed or
when you wish to reclaim space in the database file.
Vacuum is also able to change the pagesize of the database. After changing db\_fs\_type (which affects
Vacuum is also able to change the pagesize of the database. After changing `db_fs_type` (which affects
pagesize) in server.toml, you must run a vacuum for this to take effect.
docker stop <container name>
@ -114,7 +120,7 @@ pagesize) in server.toml, you must run a vacuum for this to take effect.
The server ships with a number of verification utilities to ensure that data is consistent such
as referential integrity or memberof.
Note that verification really is a last resort - the server does *a lot* to prevent and self-heal
Note that verification really is a last resort - the server does _a lot_ to prevent and self-heal
from errors at run time, so you should rarely if ever require this utility. This utility was
developed to guarantee consistency during development!

View file

@ -4,11 +4,11 @@ You will also need a config file in the volume named `server.toml` (Within the c
# The webserver bind address. Will use HTTPS if tls_* is provided.
# Defaults to "127.0.0.1:8443"
bindaddress = "127.0.0.1:8443"
bindaddress = "[::]:8443"
#
# The read-only ldap server bind address. The server will use LDAPS if tls_* is provided.
# Defaults to "" (disabled)
# ldapbindaddress = "127.0.0.1:3636"
# ldapbindaddress = "[::]:3636"
#
# The path to the kanidm database.
db_path = "/data/kanidm.db"
@ -40,6 +40,19 @@ You will also need a config file in the volume named `server.toml` (Within the c
# origin = "https://idm.example.com"
origin = "https://idm.example.com:8443"
#
#
# [online_backup]
# The path to the output folder for online backups
# path = "/var/lib/kanidm/backups/"
# The schedule to run online backups - see https://crontab.guru/
# every day at 22:00 UTC (default)
# schedule = "00 22 * * *"
# four times a day at 3 minutes past the hour, every 6th hours
# schedule = "03 */6 * * *"
# Number of backups to keep (default 7)
# versions = 7
#
#
# The role of this server. This affects features available and how replication may interact.
# Valid roles are:
# - WriteReplica
@ -60,7 +73,7 @@ Then you can setup the initial admin account and initialise the database into yo
docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd recover_account -c /data/server.toml -n admin
You then want to set your domain name so that security principal names (spn's) are generated correctly.
This domain name *must* match the url/origin of the server that you plan to use to interact with
This domain name _must_ match the url/origin of the server that you plan to use to interact with
so that other features work correctly. It is possible to change this domain name later.
docker run --rm -i -t -v kanidmd:/data kanidm/server:latest /sbin/kanidmd domain_name_change -c /data/server.toml -n idm.example.com

View file

@ -38,6 +38,7 @@ rand = "0.8"
toml = "0.5"
chrono = "0.4"
saffron = "0.1.0"
regex = "1"
lazy_static = "1.2.0"

View file

@ -1,10 +1,15 @@
use tokio::sync::mpsc::UnboundedSender as Sender;
use chrono::{DateTime, SecondsFormat, Utc};
use std::sync::Arc;
use crate::prelude::*;
use crate::event::{AuthEvent, AuthResult, SearchEvent, SearchResult, WhoamiResult};
use crate::be::BackendTransaction;
use crate::event::{
AuthEvent, AuthResult, OnlineBackupEvent, SearchEvent, SearchResult, WhoamiResult,
};
use crate::idm::event::{
CredentialStatusEvent, RadiusAuthTokenEvent, ReadBackupCodeEvent, UnixGroupTokenEvent,
UnixUserAuthEvent, UnixUserTokenEvent,
@ -26,6 +31,9 @@ use kanidm_proto::v1::{
WhoamiResponse,
};
use regex::Regex;
use std::fs;
use std::path::{Path, PathBuf};
use uuid::Uuid;
use ldap3_server::simple::*;
@ -167,6 +175,125 @@ impl QueryServerReadV1 {
res
}
pub async fn handle_online_backup(
&self,
msg: OnlineBackupEvent,
outpath: &str,
versions: usize,
) {
let mut audit = AuditScope::new("online backup", msg.eventid, self.log_level);
ltrace!(audit, "Begin online backup event {:?}", msg.eventid);
let now: DateTime<Utc> = Utc::now();
let timestamp = now.to_rfc3339_opts(SecondsFormat::Secs, true);
let dest_file = format!("{}/backup-{}.json", outpath, timestamp);
match Path::new(&dest_file).exists() {
true => {
error!(
"Online backup file {} already exists, will not owerwrite it.",
dest_file
);
}
false => {
let idms_prox_read = self.idms.proxy_read_async().await;
lperf_op_segment!(
&mut audit,
"actors::v1_read::handle<OnlineBackupEvent>",
|| {
let res = idms_prox_read
.qs_read
.get_be_txn()
.backup(&mut audit, &dest_file);
match &res {
Ok(()) => {
info!("Online backup created {} successfully", dest_file);
}
Err(e) => {
error!("Online backup failed to create {}: {:?}", dest_file, e);
}
}
ladmin_info!(audit, "online backup result: {:?}", res);
}
);
}
}
// cleanup of maximum backup versions to keep
let mut backup_file_list: Vec<PathBuf> = Vec::new();
// pattern to find automatically generated backup files
let re = Regex::new(r"^backup-\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\.json$")
.expect("Failed to parse regexp for online backup files.");
// get a list of backup files
match fs::read_dir(outpath) {
Ok(rd) => {
for entry in rd {
// get PathBuf
let pb = entry.unwrap().path();
// skip everything that is not a file
if !pb.is_file() {
continue;
}
// get the /some/dir/<file_name> of the file
let file_name = pb.file_name().unwrap().to_str().unwrap();
// check for a online backup file
if re.is_match(file_name) {
backup_file_list.push(pb.clone());
}
}
}
Err(e) => {
error!("Online backup cleanup error read dir {}: {}", outpath, e);
}
}
// sort it to have items listed old to new
backup_file_list.sort();
// Versions: OLD 10.9.8.7.6.5.4.3.2.1 NEW
// |----delete----|keep|
// 10 items, we want to keep the latest 3
// if we have more files then we want to keep, me do some cleanup
if backup_file_list.len() > versions {
let x = backup_file_list.len() - versions;
info!(
"Online backup cleanup found {} versions, should keep {}, will remove {}",
backup_file_list.len(),
versions,
x
);
backup_file_list.truncate(x);
// removing files
for file in backup_file_list {
debug!("Online backup cleanup: removing {:?}", &file);
match fs::remove_file(&file) {
Ok(_) => {}
Err(e) => {
error!(
"Online backup cleanup failed to remove file {:?}: {:?}",
file, e
)
}
};
}
} else {
debug!("Online backup cleanup had no files to remove");
};
// At the end of the event we send it for logging.
self.log.send(audit).unwrap_or_else(|_| {
error!("CRITICAL: UNABLE TO COMMIT LOGS");
});
}
pub async fn handle_whoami(
&self,
uat: Option<String>,

View file

@ -14,6 +14,23 @@ pub struct IntegrationTestConfig {
pub admin_password: String,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct OnlineBackup {
pub path: String,
#[serde(default = "default_online_backup_schedule")]
pub schedule: String,
#[serde(default = "default_online_backup_versions")]
pub versions: usize,
}
fn default_online_backup_schedule() -> String {
"00 22 * * *".to_string()
}
fn default_online_backup_versions() -> usize {
7
}
#[derive(Serialize, Deserialize, Debug)]
pub struct TlsConfiguration {
pub chain: String,
@ -71,6 +88,7 @@ pub struct Configuration {
pub cookie_key: [u8; 32],
pub integration_test_config: Option<Box<IntegrationTestConfig>>,
pub log_level: Option<u32>,
pub online_backup: Option<OnlineBackup>,
pub origin: String,
pub role: ServerRole,
}
@ -95,6 +113,10 @@ impl fmt::Display for Configuration {
Some(u) => write!(f, "with log_level: {:x}, ", u),
None => write!(f, "with log_level: default, "),
})
.and_then(|_| match &self.online_backup {
Some(_) => write!(f, "with online_backup: enabled, "),
None => write!(f, "with online_backup: disabled, "),
})
.and_then(|_| write!(f, "role: {}, ", self.role.to_string()))
.and_then(|_| {
write!(
@ -124,6 +146,7 @@ impl Configuration {
cookie_key: [0; 32],
integration_test_config: None,
log_level: None,
online_backup: None,
origin: "https://idm.example.com".to_string(),
role: ServerRole::WriteReplica,
};
@ -136,6 +159,22 @@ impl Configuration {
self.log_level = log_level;
}
pub fn update_online_backup(&mut self, cfg: &Option<OnlineBackup>) {
match cfg {
None => {}
Some(cfg) => {
let path = cfg.path.to_string();
let schedule = cfg.schedule.to_string();
let versions = cfg.versions;
self.online_backup = Some(OnlineBackup {
path,
schedule,
versions,
})
}
}
}
pub fn update_db_path(&mut self, p: &str) {
self.db_path = p.to_string();
}

View file

@ -702,6 +702,15 @@ pub async fn create_server_core(config: Configuration) -> Result<(), ()> {
// Setup timed events associated to the write thread
IntervalActor::start(server_write_ref);
// Setup timed events associated to the read thread
match &config.online_backup {
Some(cfg) => {
IntervalActor::start_online_backup(server_read_ref, &cfg)?;
}
None => {
debug!("Online backup not requested, skipping");
}
};
// If we have been requested to init LDAP, configure it now.
match &config.ldapaddress {

View file

@ -910,6 +910,27 @@ impl PurgeRecycledEvent {
}
}
#[derive(Debug)]
pub struct OnlineBackupEvent {
pub ident: Identity,
pub eventid: Uuid,
}
impl Default for OnlineBackupEvent {
fn default() -> Self {
Self::new()
}
}
impl OnlineBackupEvent {
pub fn new() -> Self {
OnlineBackupEvent {
ident: Identity::from_internal(),
eventid: Uuid::new_v4(),
}
}
}
#[derive(Debug)]
pub struct ReviveRecycledEvent {
pub ident: Identity,

View file

@ -1,11 +1,19 @@
//! This contains scheduled tasks/interval tasks that are run inside of the server on a schedule
//! as background operations.
use crate::actors::v1_read::QueryServerReadV1;
use crate::actors::v1_write::QueryServerWriteV1;
use crate::constants::PURGE_FREQUENCY;
use crate::event::{PurgeRecycledEvent, PurgeTombstoneEvent};
use tokio::time::{interval, Duration};
use crate::config::OnlineBackup;
use crate::constants::PURGE_FREQUENCY;
use crate::event::{OnlineBackupEvent, PurgeRecycledEvent, PurgeTombstoneEvent};
use chrono::Utc;
use saffron::parse::{CronExpr, English};
use saffron::Cron;
use std::fs;
use std::path::Path;
use tokio::time::{interval, sleep, Duration};
pub struct IntervalActor;
@ -24,4 +32,81 @@ impl IntervalActor {
}
});
}
pub fn start_online_backup(
server: &'static QueryServerReadV1,
cfg: &OnlineBackup,
) -> Result<(), ()> {
let outpath = cfg.path.to_owned();
let schedule = cfg.schedule.to_owned();
let versions = cfg.versions;
// Cron expression handling
let cron_expr = schedule.as_str().parse::<CronExpr>().map_err(|e| {
error!("Online backup schedule parse error: {}", e);
})?;
info!(
"Online backup schedule parsed as: {}",
cron_expr.describe(English::default())
);
if !Cron::new(cron_expr.clone()).any() {
error!(
"Online backup schedule error: '{}' will not match any date.",
schedule
);
return Err(());
}
// Output path handling
let op = Path::new(&outpath);
// does the path exist and is a directory?
if !op.exists() {
info!(
"Online backup output folder '{}' does not exist, trying to create it.",
outpath
);
fs::create_dir_all(&outpath).map_err(|e| {
error!(
"Online backup failed to create output directory '{}': {}",
outpath.clone(),
e
)
})?;
}
if !op.is_dir() {
error!("Online backup output '{}' is not a directory or we are missing permissions to access it.", outpath);
return Err(());
}
tokio::spawn(async move {
let ct = Utc::now();
let cron = Cron::new(cron_expr.clone());
let cron_iter = cron.clone().iter_after(ct);
for next_time in cron_iter {
// We add 1 second to the `wait_time` in order to get "even" timestampes
// for example: 1 + 17:05:59Z --> 17:06:00Z
let wait_seconds = 1 + (next_time - Utc::now()).num_seconds() as u64;
info!(
"Online backup next run on {}, wait_time = {}s",
next_time, wait_seconds
);
sleep(Duration::from_secs(wait_seconds)).await;
server
.handle_online_backup(
OnlineBackupEvent::new(),
outpath.clone().as_str(),
versions,
)
.await;
}
});
Ok(())
}
}

View file

@ -26,7 +26,7 @@ use std::path::PathBuf;
use std::str::FromStr;
use kanidm::audit::LogLevel;
use kanidm::config::{Configuration, ServerRole};
use kanidm::config::{Configuration, OnlineBackup, ServerRole};
use kanidm::core::{
backup_server_core, create_server_core, dbscan_get_id2entry_core, dbscan_list_id2entry_core,
dbscan_list_index_analysis_core, dbscan_list_index_core, dbscan_list_indexes_core,
@ -50,6 +50,7 @@ struct ServerConfig {
pub tls_chain: Option<String>,
pub tls_key: Option<String>,
pub log_level: Option<String>,
pub online_backup: Option<OnlineBackup>,
pub origin: String,
#[serde(default)]
pub role: ServerRole,
@ -223,6 +224,7 @@ async fn main() {
config.update_tls(&sconfig.tls_chain, &sconfig.tls_key);
config.update_bind(&sconfig.bindaddress);
config.update_ldapbind(&sconfig.ldapbindaddress);
config.update_online_backup(&sconfig.online_backup);
if let Some(i_str) = &(sconfig.tls_chain) {
let i_path = PathBuf::from(i_str.as_str());