mirror of
https://github.com/kanidm/kanidm.git
synced 2025-02-23 20:47:01 +01:00
Add statistical analysis to indexes (#505)
This commit is contained in:
parent
4c6a28b7ad
commit
040e9fd352
|
@ -18,3 +18,5 @@
|
|||
## Acknowledgements
|
||||
|
||||
* M. Gerstner
|
||||
* Perri Boulton
|
||||
|
||||
|
|
|
@ -2,16 +2,22 @@ use crate::audit::AuditScope;
|
|||
use crate::be::idl_sqlite::{
|
||||
IdlSqlite, IdlSqliteReadTransaction, IdlSqliteTransaction, IdlSqliteWriteTransaction,
|
||||
};
|
||||
use crate::be::idxkey::{IdlCacheKey, IdlCacheKeyRef, IdlCacheKeyToRef};
|
||||
use crate::be::idxkey::{
|
||||
IdlCacheKey, IdlCacheKeyRef, IdlCacheKeyToRef, IdxKey, IdxKeyRef, IdxKeyToRef, IdxSlope,
|
||||
};
|
||||
use crate::be::{BackendConfig, IdList, IdRawEntry};
|
||||
use crate::entry::{Entry, EntryCommitted, EntrySealed};
|
||||
use crate::value::IndexType;
|
||||
use crate::value::Value;
|
||||
|
||||
use concread::arcache::{ARCache, ARCacheReadTxn, ARCacheWriteTxn};
|
||||
use concread::cowcell::*;
|
||||
use idlset::{v2::IDLBitRange, AndNot};
|
||||
use kanidm_proto::v1::{ConsistencyError, OperationError};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use std::collections::BTreeSet;
|
||||
use std::convert::TryInto;
|
||||
use std::ops::DerefMut;
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
@ -800,6 +806,262 @@ impl<'a> IdlArcSqliteWriteTransaction<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn is_idx_slopeyness_generated(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
) -> Result<bool, OperationError> {
|
||||
self.db.is_idx_slopeyness_generated(audit)
|
||||
}
|
||||
|
||||
pub fn get_idx_slope(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
ikey: &IdxKey,
|
||||
) -> Result<Option<IdxSlope>, OperationError> {
|
||||
self.db.get_idx_slope(audit, ikey)
|
||||
}
|
||||
|
||||
/// Index Slope Analysis. For the purpose of external modules you can consider this as a
|
||||
/// module that generates "weights" for each index that we have. Smaller values are faster
|
||||
/// indexes - larger values are more costly ones. This is not intended to yield perfect
|
||||
/// weights. The intent is to seperate over obviously more effective indexes rather than
|
||||
/// to min-max the fine tuning of these. Consider name=foo vs class=*. name=foo will always
|
||||
/// be better than class=*, but comparing name=foo to spn=foo is "much over muchness" since
|
||||
/// both are really fast.
|
||||
pub fn analyse_idx_slopes(&mut self, audit: &mut AuditScope) -> Result<(), OperationError> {
|
||||
/*
|
||||
* Inside of this analysis there are two major factors we need to understand
|
||||
*
|
||||
* * What is the variation of idl lengths within an index?
|
||||
* * How man keys are stored in this index?
|
||||
*
|
||||
* Since we have the filter2idl threshold, we want to find "what is the smallest
|
||||
* and most unique index asap so we can exit faster". This allows us to avoid
|
||||
* loading larger most costly indexs that either have large idls, high variation
|
||||
* or few keys and are likely to miss and have to go out to disk.
|
||||
*
|
||||
* A few methods were proposed, but thanks to advice from Perri Boulton (psychology
|
||||
* researcher with a background in statistics), we were able to device a reasonable
|
||||
* approach.
|
||||
*
|
||||
* These are commented in line to help understand the process.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Step 1 - we have an index like "idx_eq_member". It has data that looks somewhat
|
||||
* like:
|
||||
*
|
||||
* key | idl
|
||||
* -------+------------
|
||||
* uuid_a | [1, 2, 3, ...]
|
||||
* -------+------------
|
||||
* uuid_b | [4, 5, 6, ...]
|
||||
*
|
||||
* We need to collect this into a single vec of "how long is each idl". Since we have
|
||||
* each idl in the vec, the length of the vec is also the number of keys in the set.
|
||||
* This yields for us:
|
||||
*
|
||||
* idx_eq_member: [4.0, 5.0, ...]
|
||||
* where each f64 value is the float representation of the length of idl.
|
||||
*
|
||||
* We then assemble these to a map so we have each idxkey and it's associated list
|
||||
* of idl lens.
|
||||
*/
|
||||
|
||||
let mut data: HashMap<IdxKey, Vec<f64>> = HashMap::new();
|
||||
self.idl_cache.iter_dirty().for_each(|(k, maybe_idl)| {
|
||||
if let Some(idl) = maybe_idl {
|
||||
let idl_len: u32 = idl.len().try_into().unwrap_or(u32::MAX);
|
||||
// Convert to something we can use.
|
||||
let idl_len = f64::from(idl_len);
|
||||
|
||||
let kref = IdxKeyRef::new(&k.a, &k.i);
|
||||
if idl_len > 0.0 {
|
||||
// It's worth looking at. Anything len 0 will be removed.
|
||||
if let Some(lens) = data.get_mut(&kref as &dyn IdxKeyToRef) {
|
||||
lens.push(idl_len)
|
||||
} else {
|
||||
data.insert(kref.to_key(), vec![idl_len]);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/*
|
||||
* So now for each of our sets:
|
||||
*
|
||||
* idx_eq_member: [4.0, 5.0, ...]
|
||||
* idx_eq_name : [1.0, 1.0, 1.0, ...]
|
||||
*
|
||||
* To get the variability, we calculate the normal distribution of the set of values
|
||||
* and then using this variance we use the 1st deviation (~85%) value to assert that
|
||||
* 85% or more of the values in this set will be "equal or less" than this length.*
|
||||
*
|
||||
* So given say:
|
||||
* [1.0, 1.0, 1.0, 1.0]
|
||||
* We know that the sd_1 will be 1.0. Given:
|
||||
* [1.0, 1.0, 2.0, 3.0]
|
||||
* We know that it will be ~2.57 (mean 1.75 + sd of 0.82).
|
||||
*
|
||||
* The other factor is number of keys. This is thankfully easy! We have that from
|
||||
* vec.len().
|
||||
*
|
||||
* We can now calculate the index slope. Why is it a slope you ask? Because we
|
||||
* plot the data out on a graph, with "variability" on the y axis, and number of
|
||||
* keys on the x.
|
||||
*
|
||||
* Lets plot our data we just added.
|
||||
*
|
||||
* |
|
||||
* 4 +
|
||||
* |
|
||||
* 3 +
|
||||
* |
|
||||
* 2 + * eq_member
|
||||
* |
|
||||
* 1 + * eq_name
|
||||
* |
|
||||
* +--+--+--+--+--
|
||||
* 1 2 3 4
|
||||
*
|
||||
* Now, if we were to connect a line from (0,0) to each point we get a line with an angle.
|
||||
*
|
||||
* |
|
||||
* 4 +
|
||||
* |
|
||||
* 3 +
|
||||
* |
|
||||
* 2 + * eq_member
|
||||
* |
|
||||
* 1 + * eq_name
|
||||
* |/---------/
|
||||
* +--+--+--+--+--
|
||||
* 1 2 3 4
|
||||
|
||||
* |
|
||||
* 4 +
|
||||
* |
|
||||
* 3 +
|
||||
* |
|
||||
* 2 + * eq_member
|
||||
* | /--/
|
||||
* 1 + /--/ * eq_name
|
||||
* |/--/
|
||||
* +--+--+--+--+--
|
||||
* 1 2 3 4
|
||||
*
|
||||
* (Look it's ascii art, don't judge.).
|
||||
*
|
||||
* Point is that eq_member is "steeper" and eq_name is "shallower". This is what we call
|
||||
* the "slopeyness" aka the jank of the line, or more precisely, the angle.
|
||||
*
|
||||
* Now we need a way to numerically compare these lines. Since the points could be
|
||||
* anywere on our graph:
|
||||
*
|
||||
* |
|
||||
* 4 + *
|
||||
* |
|
||||
* 3 + *
|
||||
* |
|
||||
* 2 + *
|
||||
* |
|
||||
* 1 + *
|
||||
* |
|
||||
* +--+--+--+--+--
|
||||
* 1 2 3 4
|
||||
*
|
||||
* While we can see what's obvious or best here, a computer has to know it. So we now
|
||||
* assume that these points construct a triangle, going through (0,0), (x, 0) and (x, y).
|
||||
*
|
||||
*
|
||||
* Λ│
|
||||
* ╱ │
|
||||
* ╱ │
|
||||
* ╱ │
|
||||
* ╱ │
|
||||
* ╱ │
|
||||
* ╱ │
|
||||
* ╱ │ sd_1
|
||||
* ╱ │
|
||||
* ╱ │
|
||||
* ───────────┼
|
||||
* nkeys
|
||||
*
|
||||
* Since this is right angled we can use arctan to work out the degress of the line. This
|
||||
* gives us a value from 1.0 to 90.0 (We clamp to a minimum of 1.0, because we use 0 as "None"
|
||||
* in the NonZeroU8 type in filter.rs, which allows ZST optimisation)
|
||||
*
|
||||
* The problem is that we have to go from float to u8 - this means we lose decimal precision
|
||||
* in the conversion. To lessen this, we multiply by 2 to give some extra weight to each angle
|
||||
* to minimise this loss and then we convert.
|
||||
*
|
||||
* And there we have it! A slope factor of the index! A way to compare these sets quickly
|
||||
* at query optimisation time to minimse index access.
|
||||
*/
|
||||
let slopes: HashMap<_, _> = data
|
||||
.into_iter()
|
||||
.filter_map(|(k, lens)| {
|
||||
let slope_factor = Self::calculate_sd_slope(lens);
|
||||
if slope_factor == 0 || slope_factor == IdxSlope::MAX {
|
||||
None
|
||||
} else {
|
||||
Some((k, slope_factor))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
ltrace!(audit, "Generated slopes -> {:?}", slopes);
|
||||
// Write the data down
|
||||
self.db.store_idx_slope_analysis(audit, &slopes)
|
||||
}
|
||||
|
||||
fn calculate_sd_slope(data: Vec<f64>) -> IdxSlope {
|
||||
let (n_keys, sd_1) = if data.len() >= 2 {
|
||||
// We can only do SD on sets greater than 2
|
||||
let l: u32 = data.len().try_into().unwrap_or(u32::MAX);
|
||||
let c = f64::from(l);
|
||||
let mean = data.iter().take(u32::MAX as usize).sum::<f64>() / c;
|
||||
let varience: f64 = data
|
||||
.iter()
|
||||
.take(u32::MAX as usize)
|
||||
.map(|len| {
|
||||
let delta = mean - len;
|
||||
delta * delta
|
||||
})
|
||||
.sum::<f64>()
|
||||
/ (c - 1.0);
|
||||
|
||||
let sd = varience.sqrt();
|
||||
|
||||
// This is saying ~85% of values will be at least this len or less.
|
||||
let sd_1 = mean + sd;
|
||||
(c, sd_1)
|
||||
} else if data.len() == 1 {
|
||||
(1.0, data[0])
|
||||
} else {
|
||||
// Cant resolve.
|
||||
return IdxSlope::MAX;
|
||||
};
|
||||
|
||||
// Now we know sd_1 and number of keys. We can use this as a triangle to work out
|
||||
// the angle along the hypotenuse. We use this angle - or slope - to show which
|
||||
// elements have the smallest sd_1 and most keys available. Then because this
|
||||
// is bound between 0.0 -> 90.0, we "unfurl" this around a half circle by multipling
|
||||
// by 2. This gives us a little more precision when we drop the decimal point.
|
||||
let sf = (sd_1 / n_keys).atan().to_degrees() * 2.8;
|
||||
|
||||
// Now these are fractions, and we can't use those in u8, so we clamp the min/max values
|
||||
// that we expect to be yielded.
|
||||
let sf = sf.clamp(1.0, 254.0);
|
||||
if !sf.is_finite() {
|
||||
IdxSlope::MAX
|
||||
} else {
|
||||
// SAFETY
|
||||
// `sf` is clamped between 1.0 and 180.0 above, ensuring it is
|
||||
// always in range.
|
||||
unsafe { sf.to_int_unchecked::<IdxSlope>() }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_name2uuid(&self, audit: &mut AuditScope) -> Result<(), OperationError> {
|
||||
self.db.create_name2uuid(audit)
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use crate::audit::AuditScope;
|
||||
use crate::be::{BackendConfig, IdList, IdRawEntry};
|
||||
use crate::be::{BackendConfig, IdList, IdRawEntry, IdxKey, IdxSlope};
|
||||
use crate::entry::{Entry, EntryCommitted, EntrySealed};
|
||||
use crate::value::{IndexType, Value};
|
||||
use hashbrown::HashMap;
|
||||
use idlset::v2::IDLBitRange;
|
||||
use kanidm_proto::v1::{ConsistencyError, OperationError};
|
||||
use r2d2::Pool;
|
||||
|
@ -195,13 +196,7 @@ pub trait IdlSqliteTransaction {
|
|||
}
|
||||
}
|
||||
|
||||
fn exists_idx(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
attr: &str,
|
||||
itype: &IndexType,
|
||||
) -> Result<bool, OperationError> {
|
||||
let tname = format!("idx_{}_{}", itype.as_idx_str(), attr);
|
||||
fn exists_table(&self, audit: &mut AuditScope, tname: &str) -> Result<bool, OperationError> {
|
||||
let mut stmt = self
|
||||
.get_conn()
|
||||
.prepare("SELECT COUNT(name) from sqlite_master where name = :tname")
|
||||
|
@ -210,7 +205,7 @@ pub trait IdlSqliteTransaction {
|
|||
OperationError::SqliteError
|
||||
})?;
|
||||
let i: Option<i64> = stmt
|
||||
.query_row(&[(":tname", &tname)], |row| row.get(0))
|
||||
.query_row(&[(":tname", tname)], |row| row.get(0))
|
||||
.map_err(|e| {
|
||||
ladmin_error!(audit, "SQLite Error {:?}", e);
|
||||
OperationError::SqliteError
|
||||
|
@ -223,6 +218,16 @@ pub trait IdlSqliteTransaction {
|
|||
}
|
||||
}
|
||||
|
||||
fn exists_idx(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
attr: &str,
|
||||
itype: &IndexType,
|
||||
) -> Result<bool, OperationError> {
|
||||
let tname = format!("idx_{}_{}", itype.as_idx_str(), attr);
|
||||
self.exists_table(audit, &tname)
|
||||
}
|
||||
|
||||
fn get_idl(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
|
@ -1067,6 +1072,94 @@ impl IdlSqliteWriteTransaction {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn store_idx_slope_analysis(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
slopes: &HashMap<IdxKey, IdxSlope>,
|
||||
) -> Result<(), OperationError> {
|
||||
self.conn
|
||||
.execute(
|
||||
"CREATE TABLE IF NOT EXISTS idxslope_analysis (
|
||||
id TEXT PRIMARY KEY,
|
||||
slope INTEGER
|
||||
)",
|
||||
[],
|
||||
)
|
||||
.map(|_| ())
|
||||
.map_err(|e| {
|
||||
ladmin_error!(audit, "SQLite Error {:?}", e);
|
||||
OperationError::SqliteError
|
||||
})?;
|
||||
|
||||
// Remove any data if it exists.
|
||||
self.conn
|
||||
.execute("DELETE FROM idxslope_analysis", [])
|
||||
.map(|_| ())
|
||||
.map_err(|e| {
|
||||
ladmin_error!(audit, "SQLite Error {:?}", e);
|
||||
OperationError::SqliteError
|
||||
})?;
|
||||
|
||||
slopes.iter().try_for_each(|(k, v)| {
|
||||
let key = format!("idx_{}_{}", k.itype.as_idx_str(), k.attr);
|
||||
self.conn
|
||||
.execute(
|
||||
"INSERT OR REPLACE INTO idxslope_analysis (id, slope) VALUES(:id, :slope)",
|
||||
named_params! {
|
||||
":id": &key,
|
||||
":slope": &v,
|
||||
},
|
||||
)
|
||||
.map(|_| ())
|
||||
.map_err(|e| {
|
||||
eprintln!("CRITICAL: rusqlite error {:?}", e);
|
||||
OperationError::SqliteError
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_idx_slopeyness_generated(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
) -> Result<bool, OperationError> {
|
||||
self.exists_table(audit, "idxslope_analysis")
|
||||
}
|
||||
|
||||
pub fn get_idx_slope(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
ikey: &IdxKey,
|
||||
) -> Result<Option<IdxSlope>, OperationError> {
|
||||
let analysis_exists = self.exists_table(audit, "idxslope_analysis")?;
|
||||
if !analysis_exists {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Then we have the table and it should have things in it, lets put
|
||||
// it all together.
|
||||
let key = format!("idx_{}_{}", ikey.itype.as_idx_str(), ikey.attr);
|
||||
|
||||
let mut stmt = self
|
||||
.get_conn()
|
||||
.prepare("SELECT slope FROM idxslope_analysis WHERE id = :id")
|
||||
.map_err(|e| {
|
||||
ladmin_error!(audit, "SQLite Error {:?}", e);
|
||||
OperationError::SqliteError
|
||||
})?;
|
||||
|
||||
let slope: Option<IdxSlope> = stmt
|
||||
.query_row(&[(":id", &key)], |row| row.get(0))
|
||||
// We don't mind if it doesn't exist
|
||||
.optional()
|
||||
.map_err(|e| {
|
||||
ladmin_error!(audit, "SQLite Error {:?}", e);
|
||||
OperationError::SqliteError
|
||||
})?;
|
||||
ltrace!(audit, "Got slope for index name {} -> {:?}", key, slope);
|
||||
|
||||
Ok(slope)
|
||||
}
|
||||
|
||||
pub unsafe fn purge_id2entry(&self, audit: &mut AuditScope) -> Result<(), OperationError> {
|
||||
ltrace!(audit, "purge id2entry ...");
|
||||
self.conn
|
||||
|
@ -1303,7 +1396,7 @@ impl IdlSqliteWriteTransaction {
|
|||
dbv_id2entry
|
||||
);
|
||||
}
|
||||
// * if v3 -> complete.
|
||||
// * if v3 -> create name2uuid, uuid2spn, uuid2rdn.
|
||||
if dbv_id2entry == 3 {
|
||||
self.create_name2uuid(audit)
|
||||
.and_then(|_| self.create_uuid2spn(audit))
|
||||
|
|
|
@ -4,6 +4,8 @@ use std::borrow::Borrow;
|
|||
use std::cmp::Ordering;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
pub type IdxSlope = u8;
|
||||
|
||||
// Huge props to https://github.com/sunshowers/borrow-complex-key-example/blob/master/src/lib.rs
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
|
@ -12,6 +14,15 @@ pub struct IdxKey {
|
|||
pub itype: IndexType,
|
||||
}
|
||||
|
||||
impl IdxKey {
|
||||
pub fn new(attr: &str, itype: IndexType) -> Self {
|
||||
IdxKey {
|
||||
attr: attr.into(),
|
||||
itype,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct IdxKeyRef<'a> {
|
||||
pub attr: &'a str,
|
||||
|
@ -22,6 +33,13 @@ impl<'a> IdxKeyRef<'a> {
|
|||
pub fn new(attr: &'a str, itype: &'a IndexType) -> Self {
|
||||
IdxKeyRef { attr, itype }
|
||||
}
|
||||
|
||||
pub fn to_key(&self) -> IdxKey {
|
||||
IdxKey {
|
||||
attr: self.attr.into(),
|
||||
itype: self.itype.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IdxKeyToRef {
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
use std::fs;
|
||||
|
||||
use crate::value::IndexType;
|
||||
use hashbrown::HashSet as Set;
|
||||
use hashbrown::HashMap as Map;
|
||||
use hashbrown::HashSet;
|
||||
use std::cell::UnsafeCell;
|
||||
use std::sync::Arc;
|
||||
|
||||
|
@ -32,7 +33,7 @@ mod idl_arc_sqlite;
|
|||
mod idl_sqlite;
|
||||
pub(crate) mod idxkey;
|
||||
|
||||
pub(crate) use self::idxkey::{IdxKey, IdxKeyRef, IdxKeyToRef};
|
||||
pub(crate) use self::idxkey::{IdxKey, IdxKeyRef, IdxKeyToRef, IdxSlope};
|
||||
|
||||
use crate::be::idl_arc_sqlite::{
|
||||
IdlArcSqlite, IdlArcSqliteReadTransaction, IdlArcSqliteTransaction,
|
||||
|
@ -61,11 +62,11 @@ pub struct IdRawEntry {
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdxMeta {
|
||||
pub idxkeys: Set<IdxKey>,
|
||||
pub idxkeys: Map<IdxKey, IdxSlope>,
|
||||
}
|
||||
|
||||
impl IdxMeta {
|
||||
pub fn new(idxkeys: Set<IdxKey>) -> Self {
|
||||
pub fn new(idxkeys: Map<IdxKey, IdxSlope>) -> Self {
|
||||
IdxMeta { idxkeys }
|
||||
}
|
||||
}
|
||||
|
@ -158,7 +159,7 @@ pub trait BackendTransaction {
|
|||
) -> Result<(IdList, FilterPlan), OperationError> {
|
||||
Ok(match filt {
|
||||
FilterResolved::Eq(attr, value, idx) => {
|
||||
if *idx {
|
||||
if idx.is_some() {
|
||||
// Get the idx_key
|
||||
let idx_key = value.get_idx_eq_key();
|
||||
// Get the idl for this
|
||||
|
@ -178,7 +179,7 @@ pub trait BackendTransaction {
|
|||
}
|
||||
}
|
||||
FilterResolved::Sub(attr, subvalue, idx) => {
|
||||
if *idx {
|
||||
if idx.is_some() {
|
||||
// Get the idx_key
|
||||
let idx_key = subvalue.get_idx_sub_key();
|
||||
// Get the idl for this
|
||||
|
@ -198,7 +199,7 @@ pub trait BackendTransaction {
|
|||
}
|
||||
}
|
||||
FilterResolved::Pres(attr, idx) => {
|
||||
if *idx {
|
||||
if idx.is_some() {
|
||||
// Get the idl for this
|
||||
match self.get_idlayer().get_idl(
|
||||
au,
|
||||
|
@ -218,7 +219,7 @@ pub trait BackendTransaction {
|
|||
// We have no process for indexing this right now.
|
||||
(IdList::AllIds, FilterPlan::LessThanUnindexed(attr.clone()))
|
||||
}
|
||||
FilterResolved::Or(l) => {
|
||||
FilterResolved::Or(l, _) => {
|
||||
// Importantly if this has no inner elements, this returns
|
||||
// an empty list.
|
||||
let mut plan = Vec::new();
|
||||
|
@ -270,7 +271,7 @@ pub trait BackendTransaction {
|
|||
(IdList::Indexed(result), setplan)
|
||||
}
|
||||
}
|
||||
FilterResolved::And(l) => {
|
||||
FilterResolved::And(l, _) => {
|
||||
// This algorithm is a little annoying. I couldn't get it to work with iter and
|
||||
// folds due to the logic needed ...
|
||||
|
||||
|
@ -382,7 +383,7 @@ pub trait BackendTransaction {
|
|||
for f in f_andnot.iter() {
|
||||
f_rem_count -= 1;
|
||||
let f_in = match f {
|
||||
FilterResolved::AndNot(f_in) => f_in,
|
||||
FilterResolved::AndNot(f_in, _) => f_in,
|
||||
_ => {
|
||||
lfilter_error!(
|
||||
au,
|
||||
|
@ -465,7 +466,7 @@ pub trait BackendTransaction {
|
|||
// debug!("final cand set ==> {:?}", cand_idl);
|
||||
(cand_idl, setplan)
|
||||
} // end and
|
||||
FilterResolved::Inclusion(l) => {
|
||||
FilterResolved::Inclusion(l, _) => {
|
||||
// For inclusion to be valid, every term must have *at least* one element present.
|
||||
// This really relies on indexing, and so it's internal only - generally only
|
||||
// for fully indexed existance queries, such as from refint.
|
||||
|
@ -507,7 +508,7 @@ pub trait BackendTransaction {
|
|||
// of an "AND" query, but if it's used anywhere else IE the root filter, then there is
|
||||
// no other set to exclude - therefore it's empty set. Additionally, even in an OR query
|
||||
// the AndNot will be skipped as an empty set for the same reason.
|
||||
FilterResolved::AndNot(_f) => {
|
||||
FilterResolved::AndNot(_f, _) => {
|
||||
// get the idl for f
|
||||
// now do andnot?
|
||||
lfilter_error!(
|
||||
|
@ -1046,8 +1047,32 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn update_idxmeta(&mut self, mut idxkeys: Set<IdxKey>) {
|
||||
pub fn update_idxmeta(
|
||||
&mut self,
|
||||
audit: &mut AuditScope,
|
||||
idxkeys: Vec<IdxKey>,
|
||||
) -> Result<(), OperationError> {
|
||||
if self.is_idx_slopeyness_generated(audit)? {
|
||||
ltrace!(audit, "Indexing slopes available");
|
||||
} else {
|
||||
ladmin_warning!(
|
||||
audit,
|
||||
"No indexing slopes available. You should consider reindexing to generate these"
|
||||
);
|
||||
};
|
||||
|
||||
// Setup idxkeys here. By default we set these all to "max slope" aka
|
||||
// all indexes are "equal" but also worse case unless analysed. If they
|
||||
// have been analysed, we can set the slope factor into here.
|
||||
let idxkeys: Result<Map<_, _>, _> = idxkeys
|
||||
.into_iter()
|
||||
.map(|k| self.get_idx_slope(audit, &k).map(|slope| (k, slope)))
|
||||
.collect();
|
||||
|
||||
let mut idxkeys = idxkeys?;
|
||||
|
||||
std::mem::swap(&mut self.idxmeta_wr.deref_mut().idxkeys, &mut idxkeys);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Should take a mut index set, and then we write the whole thing back
|
||||
|
@ -1234,12 +1259,12 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
let idx_table_list = self.get_idlayer().list_idxs(audit)?;
|
||||
|
||||
// Turn the vec to a real set
|
||||
let idx_table_set: Set<_> = idx_table_list.into_iter().collect();
|
||||
let idx_table_set: HashSet<_> = idx_table_list.into_iter().collect();
|
||||
|
||||
let missing: Vec<_> = self
|
||||
.idxmeta
|
||||
.idxkeys
|
||||
.iter()
|
||||
.keys()
|
||||
.filter_map(|ikey| {
|
||||
// what would the table name be?
|
||||
let tname = format!("idx_{}_{}", ikey.itype.as_idx_str(), ikey.attr.as_str());
|
||||
|
@ -1269,7 +1294,7 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
|
||||
self.idxmeta
|
||||
.idxkeys
|
||||
.iter()
|
||||
.keys()
|
||||
.try_for_each(|ikey| idlayer.create_idx(audit, &ikey.attr, &ikey.itype))
|
||||
}
|
||||
|
||||
|
@ -1327,7 +1352,12 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
limmediate_warning!(audit, "Optimising Indexes ... ");
|
||||
idlayer.optimise_dirty_idls(audit);
|
||||
limmediate_warning!(audit, "done ✅\n");
|
||||
|
||||
limmediate_warning!(audit, "Calculating Index Optimisation Slopes ... ");
|
||||
idlayer.analyse_idx_slopes(audit).map_err(|e| {
|
||||
ladmin_error!(audit, "index optimisation failed -> {:?}", e);
|
||||
e
|
||||
})?;
|
||||
limmediate_warning!(audit, "done ✅\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1347,6 +1377,24 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
self.get_idlayer().get_idl(audit, attr, itype, idx_key)
|
||||
}
|
||||
|
||||
fn is_idx_slopeyness_generated(&self, audit: &mut AuditScope) -> Result<bool, OperationError> {
|
||||
self.get_idlayer().is_idx_slopeyness_generated(audit)
|
||||
}
|
||||
|
||||
fn get_idx_slope(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
ikey: &IdxKey,
|
||||
) -> Result<IdxSlope, OperationError> {
|
||||
// Do we have the slopeyness?
|
||||
let slope = self
|
||||
.get_idlayer()
|
||||
.get_idx_slope(audit, ikey)?
|
||||
.unwrap_or_else(|| get_idx_slope_default(ikey));
|
||||
ltrace!(audit, "index slope - {:?} -> {:?}", ikey, slope);
|
||||
Ok(slope)
|
||||
}
|
||||
|
||||
pub fn restore(&self, audit: &mut AuditScope, src_path: &str) -> Result<(), OperationError> {
|
||||
let idlayer = self.get_idlayer();
|
||||
// load all entries into RAM, may need to change this later
|
||||
|
@ -1369,35 +1417,6 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
OperationError::SerdeJsonError
|
||||
})?;
|
||||
|
||||
// Filter all elements that have a UUID in the system range.
|
||||
/*
|
||||
use crate::constants::UUID_ANONYMOUS;
|
||||
use crate::be::dbentry::DbEntryVers;
|
||||
use crate::be::dbvalue::DbValueV1;
|
||||
let uuid_anonymous = UUID_ANONYMOUS.clone();
|
||||
let dbentries: Vec<DbEntry> = dbentries.into_iter()
|
||||
.filter(|e| {
|
||||
let e_uuid = match &e.ent {
|
||||
DbEntryVers::V1(dbe) => dbe.attrs.get("uuid")
|
||||
.and_then(|dbvs| dbvs.first())
|
||||
.and_then(|dbv| {
|
||||
match dbv {
|
||||
DbValueV1::UU(u) => Some(u),
|
||||
_ => panic!(),
|
||||
}
|
||||
})
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
e_uuid > &uuid_anonymous
|
||||
})
|
||||
.collect();
|
||||
|
||||
dbentries.iter().for_each(|e| {
|
||||
ltrace!(audit, "importing -> {:?}", e);
|
||||
});
|
||||
*/
|
||||
|
||||
// Now, we setup all the entries with new ids.
|
||||
let mut id_max = 0;
|
||||
let identries: Result<Vec<IdRawEntry>, _> = dbentries
|
||||
|
@ -1411,16 +1430,6 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
|
||||
idlayer.write_identries_raw(audit, identries?.into_iter())?;
|
||||
|
||||
// for debug
|
||||
/*
|
||||
self.idlayer.get_identry(audit, &IdList::AllIds)
|
||||
.unwrap()
|
||||
.iter()
|
||||
.for_each(|dbe| {
|
||||
ltrace!(audit, "dbe -> {:?}", dbe.id);
|
||||
});
|
||||
*/
|
||||
|
||||
// Reindex now we are loaded.
|
||||
self.reindex(audit)?;
|
||||
|
||||
|
@ -1505,6 +1514,21 @@ impl<'a> BackendWriteTransaction<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// We have a number of hardcoded, "obvious" slopes that should
|
||||
// exist. We return these when the analysis has not been run, as
|
||||
// these are values that are generally "good enough" for most applications
|
||||
fn get_idx_slope_default(ikey: &IdxKey) -> IdxSlope {
|
||||
match (ikey.attr.as_str(), &ikey.itype) {
|
||||
("name", IndexType::Equality)
|
||||
| ("spn", IndexType::Equality)
|
||||
| ("uuid", IndexType::Equality) => 1,
|
||||
("class", IndexType::Equality) => 180,
|
||||
(_, IndexType::Equality) => 45,
|
||||
(_, IndexType::SubString) => 90,
|
||||
(_, IndexType::Presence) => 90,
|
||||
}
|
||||
}
|
||||
|
||||
// In the future this will do the routing between the chosen backends etc.
|
||||
impl Backend {
|
||||
pub fn new(
|
||||
|
@ -1513,7 +1537,7 @@ impl Backend {
|
|||
// path: &str,
|
||||
// mut pool_size: u32,
|
||||
// fstype: FsType,
|
||||
idxkeys: Set<IdxKey>,
|
||||
idxkeys: Vec<IdxKey>,
|
||||
vacuum: bool,
|
||||
) -> Result<Self, OperationError> {
|
||||
info!("DB tickets -> {:?}", cfg.pool_size);
|
||||
|
@ -1525,6 +1549,19 @@ impl Backend {
|
|||
cfg.pool_size = 1;
|
||||
}
|
||||
|
||||
// Setup idxkeys here. By default we set these all to "max slope" aka
|
||||
// all indexes are "equal" but also worse case unless analysed.
|
||||
//
|
||||
// During startup this will be "fixed" as the schema core will call reload_idxmeta
|
||||
// which will trigger a reload of the analysis data (if present).
|
||||
let idxkeys: Map<_, _> = idxkeys
|
||||
.into_iter()
|
||||
.map(|ikey| {
|
||||
let slope = get_idx_slope_default(&ikey);
|
||||
(ikey, slope)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// this has a ::memory() type, but will path == "" work?
|
||||
lperf_trace_segment!(audit, "be::new", || {
|
||||
let idlayer = Arc::new(IdlArcSqlite::new(audit, &cfg, vacuum)?);
|
||||
|
@ -1597,7 +1634,6 @@ impl Backend {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use hashbrown::HashSet as Set;
|
||||
use idlset::v2::IDLBitRange;
|
||||
use std::fs;
|
||||
use std::iter::FromIterator;
|
||||
|
@ -1626,35 +1662,36 @@ mod tests {
|
|||
let mut audit = AuditScope::new("run_test", uuid::Uuid::new_v4(), None);
|
||||
|
||||
// This is a demo idxmeta, purely for testing.
|
||||
let mut idxmeta = Set::with_capacity(16);
|
||||
idxmeta.insert(IdxKey {
|
||||
let idxmeta = vec![
|
||||
IdxKey {
|
||||
attr: AttrString::from("name"),
|
||||
itype: IndexType::Equality,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxKey {
|
||||
attr: AttrString::from("name"),
|
||||
itype: IndexType::Presence,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxKey {
|
||||
attr: AttrString::from("name"),
|
||||
itype: IndexType::SubString,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxKey {
|
||||
attr: AttrString::from("uuid"),
|
||||
itype: IndexType::Equality,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxKey {
|
||||
attr: AttrString::from("uuid"),
|
||||
itype: IndexType::Presence,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxKey {
|
||||
attr: AttrString::from("ta"),
|
||||
itype: IndexType::Equality,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxKey {
|
||||
attr: AttrString::from("tb"),
|
||||
itype: IndexType::Equality,
|
||||
});
|
||||
},
|
||||
];
|
||||
|
||||
let be = Backend::new(&mut audit, BackendConfig::new_test(), idxmeta, false)
|
||||
.expect("Failed to setup backend");
|
||||
|
@ -2765,6 +2802,103 @@ mod tests {
|
|||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_be_index_slope_generation() {
|
||||
run_test!(|audit: &mut AuditScope, be: &mut BackendWriteTransaction| {
|
||||
// Create some test entry with some indexed / unindexed values.
|
||||
let mut e1: Entry<EntryInit, EntryNew> = Entry::new();
|
||||
e1.add_ava("name", Value::from("william"));
|
||||
e1.add_ava("uuid", Value::from("db237e8a-0079-4b8c-8a56-593b22aa44d1"));
|
||||
e1.add_ava("ta", Value::from("dupe"));
|
||||
e1.add_ava("tb", Value::from("1"));
|
||||
let e1 = unsafe { e1.into_sealed_new() };
|
||||
|
||||
let mut e2: Entry<EntryInit, EntryNew> = Entry::new();
|
||||
e2.add_ava("name", Value::from("claire"));
|
||||
e2.add_ava("uuid", Value::from("db237e8a-0079-4b8c-8a56-593b22aa44d2"));
|
||||
e2.add_ava("ta", Value::from("dupe"));
|
||||
e2.add_ava("tb", Value::from("1"));
|
||||
let e2 = unsafe { e2.into_sealed_new() };
|
||||
|
||||
let mut e3: Entry<EntryInit, EntryNew> = Entry::new();
|
||||
e3.add_ava("name", Value::from("benny"));
|
||||
e3.add_ava("uuid", Value::from("db237e8a-0079-4b8c-8a56-593b22aa44d3"));
|
||||
e3.add_ava("ta", Value::from("dupe"));
|
||||
e3.add_ava("tb", Value::from("2"));
|
||||
let e3 = unsafe { e3.into_sealed_new() };
|
||||
|
||||
let _rset = be
|
||||
.create(audit, vec![e1.clone(), e2.clone(), e3.clone()])
|
||||
.unwrap();
|
||||
|
||||
// If the slopes haven't been generated yet, there are some hardcoded values
|
||||
// that we can use instead. They aren't generated until a first re-index.
|
||||
assert!(!be.is_idx_slopeyness_generated(audit).unwrap());
|
||||
|
||||
let ta_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("ta", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(ta_eq_slope, 45);
|
||||
|
||||
let tb_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("tb", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(tb_eq_slope, 45);
|
||||
|
||||
let name_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("name", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(name_eq_slope, 1);
|
||||
let uuid_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("uuid", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(uuid_eq_slope, 1);
|
||||
|
||||
let name_pres_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("name", IndexType::Presence))
|
||||
.unwrap();
|
||||
assert_eq!(name_pres_slope, 90);
|
||||
let uuid_pres_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("uuid", IndexType::Presence))
|
||||
.unwrap();
|
||||
assert_eq!(uuid_pres_slope, 90);
|
||||
// Check the slopes are what we expect for hardcoded values.
|
||||
|
||||
// Now check slope generation for the values. Today these are calculated
|
||||
// at reindex time, so we now perform the re-index.
|
||||
assert!(be.reindex(audit).is_ok());
|
||||
assert!(be.is_idx_slopeyness_generated(audit).unwrap());
|
||||
|
||||
let ta_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("ta", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(ta_eq_slope, 200);
|
||||
|
||||
let tb_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("tb", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(tb_eq_slope, 133);
|
||||
|
||||
let name_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("name", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(name_eq_slope, 51);
|
||||
let uuid_eq_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("uuid", IndexType::Equality))
|
||||
.unwrap();
|
||||
assert_eq!(uuid_eq_slope, 51);
|
||||
|
||||
let name_pres_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("name", IndexType::Presence))
|
||||
.unwrap();
|
||||
assert_eq!(name_pres_slope, 200);
|
||||
let uuid_pres_slope = be
|
||||
.get_idx_slope(audit, &IdxKey::new("uuid", IndexType::Presence))
|
||||
.unwrap();
|
||||
assert_eq!(uuid_pres_slope, 200);
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_be_limits_allids() {
|
||||
run_test!(|audit: &mut AuditScope, be: &mut BackendWriteTransaction| {
|
||||
|
|
|
@ -38,14 +38,14 @@ use kanidm_proto::v1::Filter as ProtoFilter;
|
|||
use kanidm_proto::v1::{OperationError, SchemaError};
|
||||
|
||||
use crate::be::dbentry::{DbEntry, DbEntryV1, DbEntryVers};
|
||||
use crate::be::IdxKey;
|
||||
use crate::be::{IdxKey, IdxSlope};
|
||||
|
||||
use ldap3_server::simple::{LdapPartialAttribute, LdapSearchResultEntry};
|
||||
use std::collections::BTreeMap as Map;
|
||||
pub use std::collections::BTreeSet as Set;
|
||||
use std::collections::BTreeSet;
|
||||
// use hashbrown::HashMap as Map;
|
||||
use hashbrown::HashSet;
|
||||
use hashbrown::HashMap;
|
||||
use smartstring::alias::String as AttrString;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
@ -1097,7 +1097,7 @@ impl Entry<EntrySealed, EntryCommitted> {
|
|||
/// Given the previous and current state of this entry, determine the indexing differential
|
||||
/// that needs to be applied. i.e. what indexes must be created, modified and removed.
|
||||
pub(crate) fn idx_diff<'a>(
|
||||
idxmeta: &'a HashSet<IdxKey>,
|
||||
idxmeta: &'a HashMap<IdxKey, IdxSlope>,
|
||||
pre: Option<&Self>,
|
||||
post: Option<&Self>,
|
||||
) -> IdxDiff<'a> {
|
||||
|
@ -1113,7 +1113,7 @@ impl Entry<EntrySealed, EntryCommitted> {
|
|||
(Some(pre_e), None) => {
|
||||
// If we are none (?), yield our pre-state as removals.
|
||||
idxmeta
|
||||
.iter()
|
||||
.keys()
|
||||
.flat_map(|ikey| {
|
||||
match pre_e.get_ava(ikey.attr.as_str()) {
|
||||
None => Vec::new(),
|
||||
|
@ -1143,7 +1143,7 @@ impl Entry<EntrySealed, EntryCommitted> {
|
|||
(None, Some(post_e)) => {
|
||||
// If the pre-state is none, yield our additions.
|
||||
idxmeta
|
||||
.iter()
|
||||
.keys()
|
||||
.flat_map(|ikey| {
|
||||
match post_e.get_ava(ikey.attr.as_str()) {
|
||||
None => Vec::new(),
|
||||
|
@ -1175,7 +1175,7 @@ impl Entry<EntrySealed, EntryCommitted> {
|
|||
(Some(pre_e), Some(post_e)) => {
|
||||
assert!(pre_e.state.id == post_e.state.id);
|
||||
idxmeta
|
||||
.iter()
|
||||
.keys()
|
||||
.flat_map(|ikey| {
|
||||
match (
|
||||
pre_e.get_ava_set(ikey.attr.as_str()),
|
||||
|
@ -1804,16 +1804,16 @@ impl<VALID, STATE> Entry<VALID, STATE> {
|
|||
self.attribute_lessthan(attr.as_str(), subvalue)
|
||||
}
|
||||
// Check with ftweedal about or filter zero len correctness.
|
||||
FilterResolved::Or(l) => l.iter().any(|f| self.entry_match_no_index_inner(f)),
|
||||
FilterResolved::Or(l, _) => l.iter().any(|f| self.entry_match_no_index_inner(f)),
|
||||
// Check with ftweedal about and filter zero len correctness.
|
||||
FilterResolved::And(l) => l.iter().all(|f| self.entry_match_no_index_inner(f)),
|
||||
FilterResolved::Inclusion(_) => {
|
||||
FilterResolved::And(l, _) => l.iter().all(|f| self.entry_match_no_index_inner(f)),
|
||||
FilterResolved::Inclusion(_, _) => {
|
||||
// An inclusion doesn't make sense on an entry in isolation!
|
||||
// Inclusions are part of exists queries, on search they mean
|
||||
// nothing!
|
||||
false
|
||||
}
|
||||
FilterResolved::AndNot(f) => !self.entry_match_no_index_inner(f),
|
||||
FilterResolved::AndNot(f, _) => !self.entry_match_no_index_inner(f),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2091,11 +2091,11 @@ impl From<&SchemaClass> for Entry<EntryInit, EntryNew> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::be::IdxKey;
|
||||
use crate::be::{IdxKey, IdxSlope};
|
||||
use crate::entry::{Entry, EntryInit, EntryInvalid, EntryNew};
|
||||
use crate::modify::{Modify, ModifyList};
|
||||
use crate::value::{IndexType, PartialValue, Value};
|
||||
use hashbrown::HashSet;
|
||||
use hashbrown::HashMap;
|
||||
use smartstring::alias::String as AttrString;
|
||||
use std::collections::BTreeSet as Set;
|
||||
|
||||
|
@ -2270,19 +2270,28 @@ mod tests {
|
|||
e2.add_ava("userid", Value::from("claire"));
|
||||
let e2 = unsafe { e2.into_sealed_committed() };
|
||||
|
||||
let mut idxmeta = HashSet::with_capacity(8);
|
||||
idxmeta.insert(IdxKey {
|
||||
let mut idxmeta = HashMap::with_capacity(8);
|
||||
idxmeta.insert(
|
||||
IdxKey {
|
||||
attr: AttrString::from("userid"),
|
||||
itype: IndexType::Equality,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxSlope::MAX,
|
||||
);
|
||||
idxmeta.insert(
|
||||
IdxKey {
|
||||
attr: AttrString::from("userid"),
|
||||
itype: IndexType::Presence,
|
||||
});
|
||||
idxmeta.insert(IdxKey {
|
||||
},
|
||||
IdxSlope::MAX,
|
||||
);
|
||||
idxmeta.insert(
|
||||
IdxKey {
|
||||
attr: AttrString::from("extra"),
|
||||
itype: IndexType::Equality,
|
||||
});
|
||||
},
|
||||
IdxSlope::MAX,
|
||||
);
|
||||
|
||||
// When we do None, None, we get nothing back.
|
||||
let r1 = Entry::idx_diff(&idxmeta, None, None);
|
||||
|
|
|
@ -8,25 +8,28 @@
|
|||
//! [`Filter`]: struct.Filter.html
|
||||
//! [`Entry`]: ../entry/struct.Entry.html
|
||||
|
||||
use crate::be::{IdxKey, IdxKeyRef, IdxKeyToRef, IdxMeta};
|
||||
use crate::be::{IdxKey, IdxKeyRef, IdxKeyToRef, IdxMeta, IdxSlope};
|
||||
use crate::identity::IdentityId;
|
||||
use crate::ldap::ldap_attr_filter_map;
|
||||
use crate::prelude::*;
|
||||
use crate::schema::SchemaTransaction;
|
||||
use crate::value::{IndexType, PartialValue};
|
||||
use hashbrown::HashSet;
|
||||
use concread::arcache::ARCacheReadTxn;
|
||||
use kanidm_proto::v1::Filter as ProtoFilter;
|
||||
use kanidm_proto::v1::{OperationError, SchemaError};
|
||||
use ldap3_server::proto::{LdapFilter, LdapSubstringFilter};
|
||||
// use smartstring::alias::String;
|
||||
use concread::arcache::ARCacheReadTxn;
|
||||
use smartstring::alias::String as AttrString;
|
||||
// use smartstring::alias::String as AttrString;
|
||||
use std::cmp::{Ordering, PartialOrd};
|
||||
use std::collections::BTreeSet;
|
||||
use std::hash::Hash;
|
||||
use std::iter;
|
||||
use std::num::NonZeroU8;
|
||||
use uuid::Uuid;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
#[cfg(test)]
|
||||
use hashbrown::HashSet;
|
||||
|
||||
const FILTER_DEPTH_MAX: usize = 16;
|
||||
|
||||
// Default filter is safe, ignores all hidden types!
|
||||
|
@ -104,8 +107,8 @@ pub fn f_spn_name(id: &str) -> FC<'static> {
|
|||
FC::Or(f)
|
||||
}
|
||||
|
||||
// This is the short-form for tests and internal filters that can then
|
||||
// be transformed into a filter for the server to use.
|
||||
/// This is the short-form for tests and internal filters that can then
|
||||
/// be transformed into a filter for the server to use.
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub enum FC<'a> {
|
||||
Eq(&'a str, PartialValue),
|
||||
|
@ -120,7 +123,7 @@ pub enum FC<'a> {
|
|||
// Not(Box<FC>),
|
||||
}
|
||||
|
||||
// This is the filters internal representation.
|
||||
/// This is the filters internal representation
|
||||
#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Ord, Eq)]
|
||||
enum FilterComp {
|
||||
// This is attr - value
|
||||
|
@ -137,22 +140,28 @@ enum FilterComp {
|
|||
// Not(Box<FilterComp>),
|
||||
}
|
||||
|
||||
// This is the fully resolved internal representation. Note the lack of Not and selfUUID
|
||||
// because these are resolved into And(Pres(class), AndNot(term)) and Eq(uuid, ...).
|
||||
// Importantly, we make this accessible to Entry so that it can then match on filters
|
||||
// properly.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
/// This is the fully resolved internal representation. Note the lack of Not and selfUUID
|
||||
/// because these are resolved into And(Pres(class), AndNot(term)) and Eq(uuid, ...).
|
||||
/// Importantly, we make this accessible to Entry so that it can then match on filters
|
||||
/// internally.
|
||||
///
|
||||
/// Each filter that has been resolved also has been enriched with metadata about its
|
||||
/// index, and index slope. For the purpose of this module, consider slope as a "weight"
|
||||
/// where small value - faster index, larger value - slower index. This metadata is extremely
|
||||
/// important for the query optimiser to make decisions about how to re-arrange queries
|
||||
/// correctly.
|
||||
#[derive(Debug, Clone, Eq)]
|
||||
pub enum FilterResolved {
|
||||
// This is attr - value - indexed
|
||||
Eq(AttrString, PartialValue, bool),
|
||||
Sub(AttrString, PartialValue, bool),
|
||||
Pres(AttrString, bool),
|
||||
LessThan(AttrString, PartialValue, bool),
|
||||
Or(Vec<FilterResolved>),
|
||||
And(Vec<FilterResolved>),
|
||||
// This is attr - value - indexed slope factor
|
||||
Eq(AttrString, PartialValue, Option<NonZeroU8>),
|
||||
Sub(AttrString, PartialValue, Option<NonZeroU8>),
|
||||
Pres(AttrString, Option<NonZeroU8>),
|
||||
LessThan(AttrString, PartialValue, Option<NonZeroU8>),
|
||||
Or(Vec<FilterResolved>, Option<NonZeroU8>),
|
||||
And(Vec<FilterResolved>, Option<NonZeroU8>),
|
||||
// All terms must have 1 or more items, or the inclusion is false!
|
||||
Inclusion(Vec<FilterResolved>),
|
||||
AndNot(Box<FilterResolved>),
|
||||
Inclusion(Vec<FilterResolved>, Option<NonZeroU8>),
|
||||
AndNot(Box<FilterResolved>, Option<NonZeroU8>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
|
@ -903,26 +912,6 @@ impl PartialEq for Filter<FilterValidResolved> {
|
|||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
impl PartialEq for FilterResolved {
|
||||
fn eq(&self, rhs: &FilterResolved) -> bool {
|
||||
match (self, rhs) {
|
||||
(FilterResolved::Eq(a1, v1, i1), FilterResolved::Eq(a2, v2, i2)) => {
|
||||
a1 == a2 && v1 == v2 && i1 == i2
|
||||
}
|
||||
(FilterResolved::Sub(a1, v1, i1), FilterResolved::Sub(a2, v2, i2)) => {
|
||||
a1 == a2 && v1 == v2 && i1 == i2
|
||||
}
|
||||
(FilterResolved::Pres(a1, i1), FilterResolved::Pres(a2, i2)) => a1 == a2 && i1 == i2,
|
||||
(FilterResolved::And(vs1), FilterResolved::And(vs2)) => vs1 == vs2,
|
||||
(FilterResolved::Or(vs1), FilterResolved::Or(vs2)) => vs1 == vs2,
|
||||
(FilterResolved::AndNot(f1), FilterResolved::AndNot(f2)) => f1 == f2,
|
||||
(_, _) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* Only needed in tests, in run time order only matters on the inner for
|
||||
* optimisation.
|
||||
|
@ -934,8 +923,26 @@ impl PartialOrd for Filter<FilterValidResolved> {
|
|||
}
|
||||
}
|
||||
|
||||
// remember, this isn't ordering by alphanumeric, this is ordering of
|
||||
// optimisation preference!
|
||||
impl PartialEq for FilterResolved {
|
||||
fn eq(&self, rhs: &FilterResolved) -> bool {
|
||||
match (self, rhs) {
|
||||
(FilterResolved::Eq(a1, v1, _), FilterResolved::Eq(a2, v2, _)) => a1 == a2 && v1 == v2,
|
||||
(FilterResolved::Sub(a1, v1, _), FilterResolved::Sub(a2, v2, _)) => {
|
||||
a1 == a2 && v1 == v2
|
||||
}
|
||||
(FilterResolved::Pres(a1, _), FilterResolved::Pres(a2, _)) => a1 == a2,
|
||||
(FilterResolved::LessThan(a1, v1, _), FilterResolved::LessThan(a2, v2, _)) => {
|
||||
a1 == a2 && v1 == v2
|
||||
}
|
||||
(FilterResolved::And(vs1, _), FilterResolved::And(vs2, _)) => vs1 == vs2,
|
||||
(FilterResolved::Or(vs1, _), FilterResolved::Or(vs2, _)) => vs1 == vs2,
|
||||
(FilterResolved::Inclusion(vs1, _), FilterResolved::Inclusion(vs2, _)) => vs1 == vs2,
|
||||
(FilterResolved::AndNot(f1, _), FilterResolved::AndNot(f2, _)) => f1 == f2,
|
||||
(_, _) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for FilterResolved {
|
||||
fn partial_cmp(&self, rhs: &FilterResolved) -> Option<Ordering> {
|
||||
Some(self.cmp(rhs))
|
||||
|
@ -943,42 +950,51 @@ impl PartialOrd for FilterResolved {
|
|||
}
|
||||
|
||||
impl Ord for FilterResolved {
|
||||
/// Ordering of filters for optimisation and subsequent dead term elimination.
|
||||
///
|
||||
fn cmp(&self, rhs: &FilterResolved) -> Ordering {
|
||||
let left_slopey = self.get_slopeyness_factor();
|
||||
let right_slopey = rhs.get_slopeyness_factor();
|
||||
|
||||
let r = match (left_slopey, right_slopey) {
|
||||
(Some(sfl), Some(sfr)) => sfl.cmp(&sfr),
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(None, None) => Ordering::Equal,
|
||||
};
|
||||
|
||||
// If they are equal, compare other elements for determining the order. This is what
|
||||
// allows dead term elimination to occur.
|
||||
//
|
||||
// In almost all cases, we'll miss this step though as slopes will vary distinctly.
|
||||
//
|
||||
// We do NOT need to check for indexed vs unindexed here, we already did it in the slope check!
|
||||
if r == Ordering::Equal {
|
||||
match (self, rhs) {
|
||||
(FilterResolved::Eq(a1, v1, true), FilterResolved::Eq(a2, v2, true)) => {
|
||||
(FilterResolved::Eq(a1, v1, _), FilterResolved::Eq(a2, v2, _))
|
||||
| (FilterResolved::Sub(a1, v1, _), FilterResolved::Sub(a2, v2, _))
|
||||
| (FilterResolved::LessThan(a1, v1, _), FilterResolved::LessThan(a2, v2, _)) => {
|
||||
match a1.cmp(a2) {
|
||||
Ordering::Equal => v1.cmp(v2),
|
||||
o => o,
|
||||
}
|
||||
}
|
||||
(FilterResolved::Sub(a1, v1, true), FilterResolved::Sub(a2, v2, true)) => {
|
||||
match a1.cmp(a2) {
|
||||
Ordering::Equal => v1.cmp(v2),
|
||||
o => o,
|
||||
}
|
||||
}
|
||||
(FilterResolved::Pres(a1, true), FilterResolved::Pres(a2, true)) => a1.cmp(a2),
|
||||
// Always higher prefer indexed Eq over all else, as these will have
|
||||
// the best indexes and return smallest candidates.
|
||||
(FilterResolved::Eq(_, _, true), _) => Ordering::Less,
|
||||
(_, FilterResolved::Eq(_, _, true)) => Ordering::Greater,
|
||||
(FilterResolved::Pres(_, true), _) => Ordering::Less,
|
||||
(_, FilterResolved::Pres(_, true)) => Ordering::Greater,
|
||||
(FilterResolved::Sub(_, _, true), _) => Ordering::Greater,
|
||||
(_, FilterResolved::Sub(_, _, true)) => Ordering::Less,
|
||||
// Now prefer the unindexed types by performance order.
|
||||
(FilterResolved::Pres(_, false), FilterResolved::Pres(_, false)) => Ordering::Equal,
|
||||
(FilterResolved::Pres(_, false), _) => Ordering::Less,
|
||||
(_, FilterResolved::Pres(_, false)) => Ordering::Greater,
|
||||
(FilterResolved::Eq(_, _, false), FilterResolved::Eq(_, _, false)) => Ordering::Equal,
|
||||
(FilterResolved::Eq(_, _, false), _) => Ordering::Less,
|
||||
(_, FilterResolved::Eq(_, _, false)) => Ordering::Greater,
|
||||
(FilterResolved::Sub(_, _, false), FilterResolved::Sub(_, _, false)) => Ordering::Equal,
|
||||
(FilterResolved::Sub(_, _, false), _) => Ordering::Greater,
|
||||
(_, FilterResolved::Sub(_, _, false)) => Ordering::Less,
|
||||
// They can't be compared, they don't move!
|
||||
(FilterResolved::Pres(a1, _), FilterResolved::Pres(a2, _)) => a1.cmp(a2),
|
||||
// Now sort these into the generally "best" order.
|
||||
(FilterResolved::Eq(_, _, _), _) => Ordering::Less,
|
||||
(_, FilterResolved::Eq(_, _, _)) => Ordering::Greater,
|
||||
(FilterResolved::Pres(_, _), _) => Ordering::Less,
|
||||
(_, FilterResolved::Pres(_, _)) => Ordering::Greater,
|
||||
(FilterResolved::LessThan(_, _, _), _) => Ordering::Less,
|
||||
(_, FilterResolved::LessThan(_, _, _)) => Ordering::Greater,
|
||||
(FilterResolved::Sub(_, _, _), _) => Ordering::Less,
|
||||
(_, FilterResolved::Sub(_, _, _)) => Ordering::Greater,
|
||||
// They can't be re-arranged, they don't move!
|
||||
(_, _) => Ordering::Equal,
|
||||
}
|
||||
} else {
|
||||
r
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -988,38 +1004,44 @@ impl FilterResolved {
|
|||
match fc {
|
||||
FilterComp::Eq(a, v) => {
|
||||
let idx = idxmeta.contains(&(&a, &IndexType::Equality));
|
||||
let idx = NonZeroU8::new(idx as u8);
|
||||
FilterResolved::Eq(a, v, idx)
|
||||
}
|
||||
FilterComp::SelfUuid => panic!("Not possible to resolve SelfUuid in from_invalid!"),
|
||||
FilterComp::Sub(a, v) => {
|
||||
// let idx = idxmeta.contains(&(&a, &IndexType::SubString));
|
||||
// TODO: For now, don't emit substring indexes.
|
||||
let idx = false;
|
||||
FilterResolved::Sub(a, v, idx)
|
||||
// let idx = idxmeta.contains(&(&a, &IndexType::SubString));
|
||||
// let idx = NonZeroU8::new(idx as u8);
|
||||
FilterResolved::Sub(a, v, None)
|
||||
}
|
||||
FilterComp::Pres(a) => {
|
||||
let idx = idxmeta.contains(&(&a, &IndexType::Presence));
|
||||
let idx = NonZeroU8::new(idx as u8);
|
||||
FilterResolved::Pres(a, idx)
|
||||
}
|
||||
FilterComp::LessThan(a, v) => {
|
||||
// let idx = idxmeta.contains(&(&a, &IndexType::ORDERING));
|
||||
// TODO: For now, don't emit ordering indexes.
|
||||
let idx = false;
|
||||
let idx = None;
|
||||
FilterResolved::LessThan(a, v, idx)
|
||||
}
|
||||
FilterComp::Or(vs) => FilterResolved::Or(
|
||||
vs.into_iter()
|
||||
.map(|v| FilterResolved::from_invalid(v, idxmeta))
|
||||
.collect(),
|
||||
None,
|
||||
),
|
||||
FilterComp::And(vs) => FilterResolved::And(
|
||||
vs.into_iter()
|
||||
.map(|v| FilterResolved::from_invalid(v, idxmeta))
|
||||
.collect(),
|
||||
None,
|
||||
),
|
||||
FilterComp::Inclusion(vs) => FilterResolved::Inclusion(
|
||||
vs.into_iter()
|
||||
.map(|v| FilterResolved::from_invalid(v, idxmeta))
|
||||
.collect(),
|
||||
None,
|
||||
),
|
||||
FilterComp::AndNot(f) => {
|
||||
// TODO: pattern match box here. (AndNot(box f)).
|
||||
|
@ -1027,57 +1049,80 @@ impl FilterResolved {
|
|||
// not today remove the box, and we need f in our ownership. Since
|
||||
// AndNot currently is a rare request, cloning is not the worst thing
|
||||
// here ...
|
||||
FilterResolved::AndNot(Box::new(FilterResolved::from_invalid(
|
||||
(*f).clone(),
|
||||
idxmeta,
|
||||
)))
|
||||
FilterResolved::AndNot(
|
||||
Box::new(FilterResolved::from_invalid((*f).clone(), idxmeta)),
|
||||
None,
|
||||
)
|
||||
}
|
||||
FilterComp::SelfUuid => panic!("Not possible to resolve SelfUuid in from_invalid!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_idx(fc: FilterComp, ev: &Identity, idxmeta: &HashSet<IdxKey>) -> Option<Self> {
|
||||
fn resolve_idx(
|
||||
fc: FilterComp,
|
||||
ev: &Identity,
|
||||
idxmeta: &HashMap<IdxKey, IdxSlope>,
|
||||
) -> Option<Self> {
|
||||
match fc {
|
||||
FilterComp::Eq(a, v) => {
|
||||
let idxkref = IdxKeyRef::new(&a, &IndexType::Equality);
|
||||
let idx = idxmeta.contains(&idxkref as &dyn IdxKeyToRef);
|
||||
let idx = idxmeta
|
||||
.get(&idxkref as &dyn IdxKeyToRef)
|
||||
.copied()
|
||||
.and_then(NonZeroU8::new);
|
||||
Some(FilterResolved::Eq(a, v, idx))
|
||||
}
|
||||
FilterComp::SelfUuid => ev.get_uuid().map(|uuid| {
|
||||
let uuid_s = AttrString::from("uuid");
|
||||
let idxkref = IdxKeyRef::new(&uuid_s, &IndexType::Equality);
|
||||
let idx = idxmeta
|
||||
.get(&idxkref as &dyn IdxKeyToRef)
|
||||
.copied()
|
||||
.and_then(NonZeroU8::new);
|
||||
FilterResolved::Eq(uuid_s, PartialValue::new_uuid(uuid), idx)
|
||||
}),
|
||||
FilterComp::Sub(a, v) => {
|
||||
let idxkref = IdxKeyRef::new(&a, &IndexType::SubString);
|
||||
let idx = idxmeta.contains(&idxkref as &dyn IdxKeyToRef);
|
||||
let idx = idxmeta
|
||||
.get(&idxkref as &dyn IdxKeyToRef)
|
||||
.copied()
|
||||
.and_then(NonZeroU8::new);
|
||||
Some(FilterResolved::Sub(a, v, idx))
|
||||
}
|
||||
FilterComp::Pres(a) => {
|
||||
let idxkref = IdxKeyRef::new(&a, &IndexType::Presence);
|
||||
let idx = idxmeta.contains(&idxkref as &dyn IdxKeyToRef);
|
||||
let idx = idxmeta
|
||||
.get(&idxkref as &dyn IdxKeyToRef)
|
||||
.copied()
|
||||
.and_then(NonZeroU8::new);
|
||||
Some(FilterResolved::Pres(a, idx))
|
||||
}
|
||||
FilterComp::LessThan(a, v) => {
|
||||
// let idx = idxmeta.contains(&(&a, &IndexType::SubString));
|
||||
let idx = false;
|
||||
Some(FilterResolved::LessThan(a, v, idx))
|
||||
Some(FilterResolved::LessThan(a, v, None))
|
||||
}
|
||||
// We set the compound filters slope factor to "None" here, because when we do
|
||||
// optimise we'll actually fill in the correct slope factors after we sort those
|
||||
// inner terms in a more optimial way.
|
||||
FilterComp::Or(vs) => {
|
||||
let fi: Option<Vec<_>> = vs
|
||||
.into_iter()
|
||||
.map(|f| FilterResolved::resolve_idx(f, ev, idxmeta))
|
||||
.collect();
|
||||
fi.map(FilterResolved::Or)
|
||||
fi.map(|fi| FilterResolved::Or(fi, None))
|
||||
}
|
||||
FilterComp::And(vs) => {
|
||||
let fi: Option<Vec<_>> = vs
|
||||
.into_iter()
|
||||
.map(|f| FilterResolved::resolve_idx(f, ev, idxmeta))
|
||||
.collect();
|
||||
fi.map(FilterResolved::And)
|
||||
fi.map(|fi| FilterResolved::And(fi, None))
|
||||
}
|
||||
FilterComp::Inclusion(vs) => {
|
||||
let fi: Option<Vec<_>> = vs
|
||||
.into_iter()
|
||||
.map(|f| FilterResolved::resolve_idx(f, ev, idxmeta))
|
||||
.collect();
|
||||
fi.map(FilterResolved::Inclusion)
|
||||
fi.map(|fi| FilterResolved::Inclusion(fi, None))
|
||||
}
|
||||
FilterComp::AndNot(f) => {
|
||||
// TODO: pattern match box here. (AndNot(box f)).
|
||||
|
@ -1086,43 +1131,54 @@ impl FilterResolved {
|
|||
// AndNot currently is a rare request, cloning is not the worst thing
|
||||
// here ...
|
||||
FilterResolved::resolve_idx((*f).clone(), ev, idxmeta)
|
||||
.map(|fi| FilterResolved::AndNot(Box::new(fi)))
|
||||
.map(|fi| FilterResolved::AndNot(Box::new(fi), None))
|
||||
}
|
||||
FilterComp::SelfUuid => ev.get_uuid().map(|uuid| {
|
||||
let uuid_s = AttrString::from("uuid");
|
||||
let idxkref = IdxKeyRef::new(&uuid_s, &IndexType::Equality);
|
||||
let idx = idxmeta.contains(&idxkref as &dyn IdxKeyToRef);
|
||||
FilterResolved::Eq(uuid_s, PartialValue::new_uuid(uuid), idx)
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_no_idx(fc: FilterComp, ev: &Identity) -> Option<Self> {
|
||||
// ⚠️ ⚠️ ⚠️ ⚠️
|
||||
// Remember, this function means we have NO INDEX METADATA so we can only
|
||||
// asssign slopes to values we can GUARANTEE will EXIST.
|
||||
match fc {
|
||||
FilterComp::Eq(a, v) => Some(FilterResolved::Eq(a, v, false)),
|
||||
FilterComp::Sub(a, v) => Some(FilterResolved::Sub(a, v, false)),
|
||||
FilterComp::Pres(a) => Some(FilterResolved::Pres(a, false)),
|
||||
FilterComp::LessThan(a, v) => Some(FilterResolved::LessThan(a, v, false)),
|
||||
FilterComp::Eq(a, v) => {
|
||||
// Since we have no index data, we manually configure a reasonable
|
||||
// slope and indicate the presence of some expected basic
|
||||
// indexes.
|
||||
let idx = matches!(a.as_str(), "name" | "uuid");
|
||||
let idx = NonZeroU8::new(idx as u8);
|
||||
Some(FilterResolved::Eq(a, v, idx))
|
||||
}
|
||||
FilterComp::SelfUuid => ev.get_uuid().map(|uuid| {
|
||||
FilterResolved::Eq(
|
||||
AttrString::from("uuid"),
|
||||
PartialValue::new_uuid(uuid),
|
||||
NonZeroU8::new(true as u8),
|
||||
)
|
||||
}),
|
||||
FilterComp::Sub(a, v) => Some(FilterResolved::Sub(a, v, None)),
|
||||
FilterComp::Pres(a) => Some(FilterResolved::Pres(a, None)),
|
||||
FilterComp::LessThan(a, v) => Some(FilterResolved::LessThan(a, v, None)),
|
||||
FilterComp::Or(vs) => {
|
||||
let fi: Option<Vec<_>> = vs
|
||||
.into_iter()
|
||||
.map(|f| FilterResolved::resolve_no_idx(f, ev))
|
||||
.collect();
|
||||
fi.map(FilterResolved::Or)
|
||||
fi.map(|fi| FilterResolved::Or(fi, None))
|
||||
}
|
||||
FilterComp::And(vs) => {
|
||||
let fi: Option<Vec<_>> = vs
|
||||
.into_iter()
|
||||
.map(|f| FilterResolved::resolve_no_idx(f, ev))
|
||||
.collect();
|
||||
fi.map(FilterResolved::And)
|
||||
fi.map(|fi| FilterResolved::And(fi, None))
|
||||
}
|
||||
FilterComp::Inclusion(vs) => {
|
||||
let fi: Option<Vec<_>> = vs
|
||||
.into_iter()
|
||||
.map(|f| FilterResolved::resolve_no_idx(f, ev))
|
||||
.collect();
|
||||
fi.map(FilterResolved::Inclusion)
|
||||
fi.map(|fi| FilterResolved::Inclusion(fi, None))
|
||||
}
|
||||
FilterComp::AndNot(f) => {
|
||||
// TODO: pattern match box here. (AndNot(box f)).
|
||||
|
@ -1131,31 +1187,25 @@ impl FilterResolved {
|
|||
// AndNot currently is a rare request, cloning is not the worst thing
|
||||
// here ...
|
||||
FilterResolved::resolve_no_idx((*f).clone(), ev)
|
||||
.map(|fi| FilterResolved::AndNot(Box::new(fi)))
|
||||
.map(|fi| FilterResolved::AndNot(Box::new(fi), None))
|
||||
}
|
||||
|
||||
FilterComp::SelfUuid => ev.get_uuid().map(|uuid| {
|
||||
FilterResolved::Eq(
|
||||
AttrString::from("uuid"),
|
||||
PartialValue::new_uuid(uuid),
|
||||
false,
|
||||
)
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// This is an optimise that only attempts to optimise the outer terms.
|
||||
fn fast_optimise(self) -> Self {
|
||||
match self {
|
||||
FilterResolved::Inclusion(mut f_list) => {
|
||||
FilterResolved::Inclusion(mut f_list, _) => {
|
||||
f_list.sort_unstable();
|
||||
f_list.dedup();
|
||||
FilterResolved::Inclusion(f_list)
|
||||
let sf = f_list.last().and_then(|f| f.get_slopeyness_factor());
|
||||
FilterResolved::Inclusion(f_list, sf)
|
||||
}
|
||||
FilterResolved::And(mut f_list) => {
|
||||
FilterResolved::And(mut f_list, _) => {
|
||||
f_list.sort_unstable();
|
||||
f_list.dedup();
|
||||
FilterResolved::And(f_list)
|
||||
let sf = f_list.first().and_then(|f| f.get_slopeyness_factor());
|
||||
FilterResolved::And(f_list, sf)
|
||||
}
|
||||
v => v,
|
||||
}
|
||||
|
@ -1164,29 +1214,31 @@ impl FilterResolved {
|
|||
fn optimise(&self) -> Self {
|
||||
// Most optimisations only matter around or/and terms.
|
||||
match self {
|
||||
FilterResolved::Inclusion(f_list) => {
|
||||
FilterResolved::Inclusion(f_list, _) => {
|
||||
// first, optimise all our inner elements
|
||||
let (f_list_inc, mut f_list_new): (Vec<_>, Vec<_>) = f_list
|
||||
.iter()
|
||||
.map(|f_ref| f_ref.optimise())
|
||||
.partition(|f| matches!(f, FilterResolved::Inclusion(_)));
|
||||
.partition(|f| matches!(f, FilterResolved::Inclusion(_, _)));
|
||||
|
||||
f_list_inc.into_iter().for_each(|fc| {
|
||||
if let FilterResolved::Inclusion(mut l) = fc {
|
||||
if let FilterResolved::Inclusion(mut l, _) = fc {
|
||||
f_list_new.append(&mut l)
|
||||
}
|
||||
});
|
||||
// finally, optimise this list by sorting.
|
||||
f_list_new.sort_unstable();
|
||||
f_list_new.dedup();
|
||||
FilterResolved::Inclusion(f_list_new)
|
||||
// Inclusions are similar to or, so what's our worst case?
|
||||
let sf = f_list_new.last().and_then(|f| f.get_slopeyness_factor());
|
||||
FilterResolved::Inclusion(f_list_new, sf)
|
||||
}
|
||||
FilterResolved::And(f_list) => {
|
||||
FilterResolved::And(f_list, _) => {
|
||||
// first, optimise all our inner elements
|
||||
let (f_list_and, mut f_list_new): (Vec<_>, Vec<_>) = f_list
|
||||
.iter()
|
||||
.map(|f_ref| f_ref.optimise())
|
||||
.partition(|f| matches!(f, FilterResolved::And(_)));
|
||||
.partition(|f| matches!(f, FilterResolved::And(_, _)));
|
||||
|
||||
// now, iterate over this list - for each "and" term, fold
|
||||
// it's elements to this level.
|
||||
|
@ -1201,12 +1253,12 @@ impl FilterResolved {
|
|||
// shortcutting
|
||||
|
||||
f_list_and.into_iter().for_each(|fc| {
|
||||
if let FilterResolved::And(mut l) = fc {
|
||||
if let FilterResolved::And(mut l, _) = fc {
|
||||
f_list_new.append(&mut l)
|
||||
}
|
||||
});
|
||||
|
||||
// If the f_list_or only has one element, pop it and return.
|
||||
// If the f_list_and only has one element, pop it and return.
|
||||
if f_list_new.len() == 1 {
|
||||
#[allow(clippy::expect_used)]
|
||||
f_list_new.pop().expect("corrupt?")
|
||||
|
@ -1214,21 +1266,25 @@ impl FilterResolved {
|
|||
// finally, optimise this list by sorting.
|
||||
f_list_new.sort_unstable();
|
||||
f_list_new.dedup();
|
||||
// Which ever element as the head is first must be the min SF
|
||||
// so we use this in our And to represent us.
|
||||
let sf = f_list_new.first().and_then(|f| f.get_slopeyness_factor());
|
||||
//
|
||||
// return!
|
||||
FilterResolved::And(f_list_new)
|
||||
FilterResolved::And(f_list_new, sf)
|
||||
}
|
||||
}
|
||||
FilterResolved::Or(f_list) => {
|
||||
FilterResolved::Or(f_list, _) => {
|
||||
let (f_list_or, mut f_list_new): (Vec<_>, Vec<_>) = f_list
|
||||
.iter()
|
||||
// Optimise all inner items.
|
||||
.map(|f_ref| f_ref.optimise())
|
||||
// Split out inner-or terms to fold into this term.
|
||||
.partition(|f| matches!(f, FilterResolved::Or(_)));
|
||||
.partition(|f| matches!(f, FilterResolved::Or(_, _)));
|
||||
|
||||
// Append the inner terms.
|
||||
f_list_or.into_iter().for_each(|fc| {
|
||||
if let FilterResolved::Or(mut l) = fc {
|
||||
if let FilterResolved::Or(mut l, _) = fc {
|
||||
f_list_new.append(&mut l)
|
||||
}
|
||||
});
|
||||
|
@ -1247,8 +1303,9 @@ impl FilterResolved {
|
|||
#[allow(clippy::unnecessary_sort_by)]
|
||||
f_list_new.sort_unstable_by(|a, b| b.cmp(a));
|
||||
f_list_new.dedup();
|
||||
|
||||
FilterResolved::Or(f_list_new)
|
||||
// The *last* element is the most here, and our worst case factor.
|
||||
let sf = f_list_new.last().and_then(|f| f.get_slopeyness_factor());
|
||||
FilterResolved::Or(f_list_new, sf)
|
||||
}
|
||||
}
|
||||
f => f.clone(),
|
||||
|
@ -1256,7 +1313,21 @@ impl FilterResolved {
|
|||
}
|
||||
|
||||
pub fn is_andnot(&self) -> bool {
|
||||
matches!(self, FilterResolved::AndNot(_))
|
||||
matches!(self, FilterResolved::AndNot(_, _))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn get_slopeyness_factor(&self) -> Option<NonZeroU8> {
|
||||
match self {
|
||||
FilterResolved::Eq(_, _, sf)
|
||||
| FilterResolved::Sub(_, _, sf)
|
||||
| FilterResolved::Pres(_, sf)
|
||||
| FilterResolved::LessThan(_, _, sf)
|
||||
| FilterResolved::Or(_, sf)
|
||||
| FilterResolved::And(_, sf)
|
||||
| FilterResolved::Inclusion(_, sf)
|
||||
| FilterResolved::AndNot(_, sf) => *sf,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1452,8 +1523,8 @@ mod tests {
|
|||
// antisymmetry: if a < b then !(a > b), as well as a > b implying !(a < b); and
|
||||
// These are unindexed so we have to check them this way.
|
||||
let f_t3b = unsafe { filter_resolved!(f_eq("userid", PartialValue::new_iutf8(""))) };
|
||||
assert_eq!(f_t1a.partial_cmp(&f_t3b), Some(Ordering::Less));
|
||||
assert_eq!(f_t3b.partial_cmp(&f_t1a), Some(Ordering::Greater));
|
||||
assert_eq!(f_t1a.partial_cmp(&f_t3b), Some(Ordering::Greater));
|
||||
assert_eq!(f_t3b.partial_cmp(&f_t1a), Some(Ordering::Less));
|
||||
|
||||
// transitivity: a < b and b < c implies a < c. The same must hold for both == and >.
|
||||
let f_t4b = unsafe { filter_resolved!(f_sub("userid", PartialValue::new_iutf8(""))) };
|
||||
|
|
|
@ -512,7 +512,7 @@ impl<'a> SchemaWriteTransaction<'a> {
|
|||
r
|
||||
}
|
||||
|
||||
pub(crate) fn reload_idxmeta(&self) -> HashSet<IdxKey> {
|
||||
pub(crate) fn reload_idxmeta(&self) -> Vec<IdxKey> {
|
||||
self.get_attributes()
|
||||
.values()
|
||||
.flat_map(|a| {
|
||||
|
|
|
@ -1014,6 +1014,12 @@ impl QueryServer {
|
|||
.upgrade_reindex(audit, SYSTEM_INDEX_VERSION + 1)
|
||||
.and_then(|_| reindex_write_2.commit(audit))?;
|
||||
|
||||
// Force the schema to reload - this is so that any changes to index slope
|
||||
// analysis are now reflected correctly.
|
||||
let slope_reload = task::block_on(self.write_async(ts));
|
||||
slope_reload.force_schema_reload();
|
||||
slope_reload.commit(audit)?;
|
||||
|
||||
// Now, based on the system version apply migrations. You may ask "should you not
|
||||
// be doing migrations before indexes?". And this is a very good question! The issue
|
||||
// is within a migration we must be able to search for content by pres index, and those
|
||||
|
@ -2457,8 +2463,12 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
if valid_r.is_empty() {
|
||||
// Now use this to reload the backend idxmeta
|
||||
ltrace!(audit, "Reloading idxmeta ...");
|
||||
self.be_txn.update_idxmeta(self.schema.reload_idxmeta());
|
||||
Ok(())
|
||||
self.be_txn
|
||||
.update_idxmeta(audit, self.schema.reload_idxmeta())
|
||||
.map_err(|e| {
|
||||
ladmin_error!(audit, "reload schema update idxmeta {:?}", e);
|
||||
e
|
||||
})
|
||||
} else {
|
||||
// Log the failures?
|
||||
ladmin_error!(audit, "Schema reload failed -> {:?}", valid_r);
|
||||
|
@ -2625,6 +2635,10 @@ impl<'a> QueryServerWriteTransaction<'a> {
|
|||
self.be_txn.reindex(audit)
|
||||
}
|
||||
|
||||
fn force_schema_reload(&self) {
|
||||
self.changed_schema.set(true);
|
||||
}
|
||||
|
||||
pub(crate) fn upgrade_reindex(
|
||||
&self,
|
||||
audit: &mut AuditScope,
|
||||
|
|
Loading…
Reference in a new issue