kanidm/src/lib/filter.rs

582 lines
21 KiB
Rust
Raw Normal View History

// This represents a filtering query. This can be done
// in parallel map/reduce style, or directly on a single
// entry to assert it matches.
use be::BackendReadTransaction;
use error::{OperationError, SchemaError};
use proto_v1::Filter as ProtoFilter;
2019-02-25 06:48:37 +01:00
use schema::SchemaReadTransaction;
use server::{
QueryServer, QueryServerReadTransaction, QueryServerTransaction, QueryServerWriteTransaction,
};
2019-01-29 08:17:28 +01:00
use std::cmp::{Ordering, PartialOrd};
use std::marker::PhantomData;
// Perhaps make these json serialisable. Certainly would make parsing
// simpler ...
#[derive(Debug)]
pub struct FilterValid;
#[derive(Debug)]
pub struct FilterInvalid;
#[derive(Debug)]
pub enum Filter<VALID> {
// This is attr - value
Eq(String, String),
Sub(String, String),
2018-11-07 07:54:02 +01:00
Pres(String),
Or(Vec<Filter<VALID>>),
And(Vec<Filter<VALID>>),
2019-02-12 04:56:47 +01:00
AndNot(Box<Filter<VALID>>),
invalid(PhantomData<VALID>),
}
2019-01-28 04:54:17 +01:00
// Change this so you have RawFilter and Filter. RawFilter is the "builder", and then
// given a "schema" you can emit a Filter. For us internally, we can create Filter
// directly still ...
impl Filter<FilterValid> {
2018-11-14 02:54:59 +01:00
// Does this need mut self? Aren't we returning
// a new copied filter?
pub fn optimise(&self) -> Self {
// Apply optimisations to the filter
// An easy way would be imple partialOrd
// then do sort on the or/and/not
// as the general conditions we want
// to optimise on are in those ...
//
// The other big one is folding redundant
// terms down.
//
// If an or/not/and condition has no items, remove it
//
// If its the root item?
self.clone()
}
2019-02-14 04:10:55 +01:00
pub fn invalidate(&self) -> Filter<FilterInvalid> {
match self {
Filter::Eq(a, v) => Filter::Eq(a.clone(), v.clone()),
Filter::Sub(a, v) => Filter::Sub(a.clone(), v.clone()),
Filter::Pres(a) => Filter::Pres(a.clone()),
Filter::Or(l) => Filter::Or(l.iter().map(|f| f.invalidate()).collect()),
Filter::And(l) => Filter::And(l.iter().map(|f| f.invalidate()).collect()),
Filter::AndNot(l) => Filter::AndNot(Box::new(l.invalidate())),
Filter::invalid(_) => {
// TODO: Is there a better way to not need to match the phantom?
unimplemented!()
}
}
}
}
impl Filter<FilterInvalid> {
2019-03-05 06:03:24 +01:00
pub fn new_ignore_hidden(inner: Filter<FilterInvalid>) -> Self {
// Create a new filter, that ignores hidden entries.
Filter::And(vec![
Filter::AndNot(Box::new(Filter::Or(vec![
Filter::Eq("class".to_string(), "tombstone".to_string()),
Filter::Eq("class".to_string(), "recycled".to_string()),
]))),
inner,
])
}
pub fn new_recycled(inner: Filter<FilterInvalid>) -> Self {
// Create a filter that searches recycled items only.
Filter::And(vec![
Filter::Eq("class".to_string(), "recycled".to_string()),
inner,
])
}
pub fn validate(
&self,
schema: &SchemaReadTransaction,
) -> Result<Filter<FilterValid>, SchemaError> {
// TODO:
// First, normalise (if possible)
// Then, validate
// Optimisation is done at another stage.
// This probably needs some rework
2019-02-12 04:56:47 +01:00
// TODO: Getting this each recursion could be slow. Maybe
// we need an inner functon that passes the reference?
let schema_attributes = schema.get_attributes();
let schema_name = schema_attributes
.get("name")
.expect("Critical: Core schema corrupt or missing.");
match self {
Filter::Eq(attr, value) => {
// Validate/normalise the attr name.
let attr_norm = schema_name.normalise_value(attr);
// Now check it exists
match schema_attributes.get(&attr_norm) {
Some(schema_a) => {
let value_norm = schema_a.normalise_value(value);
schema_a
2019-03-05 06:03:24 +01:00
.validate_value(&value_norm)
// Okay, it worked, transform to a filter component
.map(|_| Filter::Eq(attr_norm, value_norm))
// On error, pass the error back out.
}
None => Err(SchemaError::InvalidAttribute),
}
}
2019-02-12 04:56:47 +01:00
Filter::Sub(attr, value) => {
// Validate/normalise the attr name.
let attr_norm = schema_name.normalise_value(attr);
// Now check it exists
match schema_attributes.get(&attr_norm) {
Some(schema_a) => {
let value_norm = schema_a.normalise_value(value);
schema_a
2019-03-05 06:03:24 +01:00
.validate_value(&value_norm)
2019-02-12 04:56:47 +01:00
// Okay, it worked, transform to a filter component
.map(|_| Filter::Sub(attr_norm, value_norm))
// On error, pass the error back out.
}
None => Err(SchemaError::InvalidAttribute),
}
}
Filter::Pres(attr) => {
let attr_norm = schema_name.normalise_value(attr);
// Now check it exists
match schema_attributes.get(&attr_norm) {
2019-02-26 05:38:12 +01:00
Some(_attr_name) => {
2019-02-12 04:56:47 +01:00
// Return our valid data
Ok(Filter::Pres(attr_norm))
}
None => Err(SchemaError::InvalidAttribute),
}
}
Filter::Or(filters) => {
// If all filters are okay, return Ok(Filter::Or())
// If any is invalid, return the error.
// TODO: ftweedal says an empty or is a valid filter
// in mathematical terms.
if filters.len() == 0 {
return Err(SchemaError::EmptyFilter);
};
let x: Result<Vec<_>, _> = filters
.iter()
.map(|filter| filter.validate(schema))
.collect();
// Now put the valid filters into the Filter
x.map(|valid_filters| Filter::Or(valid_filters))
}
Filter::And(filters) => {
// TODO: ftweedal says an empty or is a valid filter
// in mathematical terms.
if filters.len() == 0 {
return Err(SchemaError::EmptyFilter);
};
let x: Result<Vec<_>, _> = filters
.iter()
.map(|filter| filter.validate(schema))
.collect();
// Now put the valid filters into the Filter
x.map(|valid_filters| Filter::And(valid_filters))
}
Filter::AndNot(filter) => {
// Just validate the inner
filter
.validate(schema)
.map(|r_filter| Filter::AndNot(Box::new(r_filter)))
}
_ => panic!(),
}
}
// TODO: This has to have two versions to account for ro/rw traits, because RS can't
// monomorphise on the trait to call clone_value. An option is to make a fn that
// takes "clone_value(t, a, v) instead, but that may have a similar issue.
pub fn from_ro(f: &ProtoFilter, qs: &QueryServerTransaction) -> Result<Self, OperationError> {
2019-03-17 04:24:06 +01:00
Ok(match f {
ProtoFilter::Eq(a, v) => Filter::Eq(a.clone(), qs.clone_value(a, v)?),
ProtoFilter::Sub(a, v) => Filter::Sub(a.clone(), qs.clone_value(a, v)?),
ProtoFilter::Pres(a) => Filter::Pres(a.clone()),
ProtoFilter::Or(l) => Filter::Or(
l.iter()
.map(|f| Self::from_ro(f, qs))
.collect::<Result<Vec<_>, _>>()?,
),
ProtoFilter::And(l) => Filter::And(
l.iter()
.map(|f| Self::from_ro(f, qs))
.collect::<Result<Vec<_>, _>>()?,
),
ProtoFilter::AndNot(l) => Filter::AndNot(Box::new(Self::from_ro(l, qs)?)),
})
}
pub fn from_rw(
f: &ProtoFilter,
qs: &QueryServerWriteTransaction,
) -> Result<Self, OperationError> {
2019-03-17 04:24:06 +01:00
Ok(match f {
ProtoFilter::Eq(a, v) => Filter::Eq(a.clone(), qs.clone_value(a, v)?),
ProtoFilter::Sub(a, v) => Filter::Sub(a.clone(), qs.clone_value(a, v)?),
ProtoFilter::Pres(a) => Filter::Pres(a.clone()),
ProtoFilter::Or(l) => Filter::Or(
l.iter()
.map(|f| Self::from_rw(f, qs))
.collect::<Result<Vec<_>, _>>()?,
),
ProtoFilter::And(l) => Filter::And(
l.iter()
.map(|f| Self::from_rw(f, qs))
.collect::<Result<Vec<_>, _>>()?,
),
ProtoFilter::AndNot(l) => Filter::AndNot(Box::new(Self::from_rw(l, qs)?)),
})
}
}
impl Clone for Filter<FilterValid> {
fn clone(&self) -> Self {
// I think we only need to match self then new + clone?
match self {
Filter::Eq(a, v) => Filter::Eq(a.clone(), v.clone()),
Filter::Sub(a, v) => Filter::Sub(a.clone(), v.clone()),
Filter::Pres(a) => Filter::Pres(a.clone()),
Filter::Or(l) => Filter::Or(l.clone()),
Filter::And(l) => Filter::And(l.clone()),
2019-02-12 04:56:47 +01:00
Filter::AndNot(l) => Filter::AndNot(l.clone()),
Filter::invalid(_) => {
// TODO: Is there a better way to not need to match the phantom?
unimplemented!()
}
}
}
}
impl Clone for Filter<FilterInvalid> {
fn clone(&self) -> Self {
// I think we only need to match self then new + clone?
match self {
Filter::Eq(a, v) => Filter::Eq(a.clone(), v.clone()),
Filter::Sub(a, v) => Filter::Sub(a.clone(), v.clone()),
Filter::Pres(a) => Filter::Pres(a.clone()),
Filter::Or(l) => Filter::Or(l.clone()),
Filter::And(l) => Filter::And(l.clone()),
2019-02-12 04:56:47 +01:00
Filter::AndNot(l) => Filter::AndNot(l.clone()),
Filter::invalid(_) => {
// TODO: Is there a better way to not need to match the phantom?
unimplemented!()
}
}
}
}
impl PartialEq for Filter<FilterValid> {
fn eq(&self, rhs: &Filter<FilterValid>) -> bool {
match (self, rhs) {
(Filter::Eq(a1, v1), Filter::Eq(a2, v2)) => a1 == a2 && v1 == v2,
(Filter::Sub(a1, v1), Filter::Sub(a2, v2)) => a1 == a2 && v1 == v2,
(Filter::Pres(a1), Filter::Pres(a2)) => a1 == a2,
(Filter::Or(l1), Filter::Or(l2)) => l1 == l2,
(Filter::And(l1), Filter::And(l2)) => l1 == l2,
2019-02-12 04:56:47 +01:00
(Filter::AndNot(l1), Filter::AndNot(l2)) => l1 == l2,
(_, _) => false,
}
}
}
// remember, this isn't ordering by alphanumeric, this is ordering of
// optimisation preference!
impl PartialOrd for Filter<FilterValid> {
fn partial_cmp(&self, rhs: &Filter<FilterValid>) -> Option<Ordering> {
match (self, rhs) {
(Filter::Eq(a1, _), Filter::Eq(a2, _)) => {
// Order attr name, then value
2018-12-29 10:56:03 +01:00
// Later we may add rules to put certain attrs ahead due
// to optimisation rules
a1.partial_cmp(a2)
}
(Filter::Sub(a1, _), Filter::Sub(a2, _)) => a1.partial_cmp(a2),
(Filter::Pres(a1), Filter::Pres(a2)) => a1.partial_cmp(a2),
(Filter::Eq(_, _), _) => {
// Always higher prefer Eq over all else, as these will have
// the best indexes and return smallest candidates.
Some(Ordering::Less)
}
(_, Filter::Eq(_, _)) => Some(Ordering::Greater),
(Filter::Pres(_), _) => Some(Ordering::Less),
(_, Filter::Pres(_)) => Some(Ordering::Greater),
(Filter::Sub(_, _), _) => Some(Ordering::Greater),
(_, Filter::Sub(_, _)) => Some(Ordering::Less),
(_, _) => Some(Ordering::Equal),
}
}
}
#[cfg(test)]
mod tests {
2019-03-05 06:49:48 +01:00
use super::{Filter, FilterInvalid};
2019-01-29 08:17:28 +01:00
use entry::{Entry, EntryNew, EntryValid};
use serde_json;
use std::cmp::{Ordering, PartialOrd};
#[test]
fn test_filter_simple() {
2019-03-05 06:49:48 +01:00
// Test construction.
let _filt: Filter<FilterInvalid> = Filter::Eq(String::from("class"), String::from("user"));
2019-03-05 06:49:48 +01:00
let _complex_filt: Filter<FilterInvalid> = Filter::And(vec![
Filter::Or(vec![
Filter::Eq(String::from("userid"), String::from("test_a")),
Filter::Eq(String::from("userid"), String::from("test_b")),
]),
Filter::Eq(String::from("class"), String::from("user")),
]);
}
#[test]
fn test_filter_optimise() {
// Given sets of "optimisable" filters, optimise them.
}
#[test]
fn test_filter_eq() {
let f_t1a = Filter::Pres(String::from("userid"));
let f_t1b = Filter::Pres(String::from("userid"));
let f_t1c = Filter::Pres(String::from("zzzz"));
assert_eq!(f_t1a == f_t1b, true);
assert_eq!(f_t1a == f_t1c, false);
assert_eq!(f_t1b == f_t1c, false);
let f_t2a = Filter::And(vec![f_t1a]);
let f_t2b = Filter::And(vec![f_t1b]);
let f_t2c = Filter::And(vec![f_t1c]);
assert_eq!(f_t2a == f_t2b, true);
assert_eq!(f_t2a == f_t2c, false);
assert_eq!(f_t2b == f_t2c, false);
assert_eq!(f_t2c == Filter::Pres(String::from("test")), false);
}
#[test]
fn test_filter_ord() {
// Test that we uphold the rules of partialOrd
// Basic equality
// Test the two major paths here (str vs list)
let f_t1a = Filter::Pres(String::from("userid"));
let f_t1b = Filter::Pres(String::from("userid"));
assert_eq!(f_t1a.partial_cmp(&f_t1b), Some(Ordering::Equal));
assert_eq!(f_t1b.partial_cmp(&f_t1a), Some(Ordering::Equal));
let f_t2a = Filter::And(vec![]);
let f_t2b = Filter::And(vec![]);
assert_eq!(f_t2a.partial_cmp(&f_t2b), Some(Ordering::Equal));
assert_eq!(f_t2b.partial_cmp(&f_t2a), Some(Ordering::Equal));
// antisymmetry: if a < b then !(a > b), as well as a > b implying !(a < b); and
let f_t3b = Filter::Eq(String::from("userid"), String::from(""));
assert_eq!(f_t1a.partial_cmp(&f_t3b), Some(Ordering::Greater));
assert_eq!(f_t3b.partial_cmp(&f_t1a), Some(Ordering::Less));
// transitivity: a < b and b < c implies a < c. The same must hold for both == and >.
let f_t4b = Filter::Sub(String::from("userid"), String::from(""));
assert_eq!(f_t1a.partial_cmp(&f_t4b), Some(Ordering::Less));
assert_eq!(f_t3b.partial_cmp(&f_t4b), Some(Ordering::Less));
assert_eq!(f_t4b.partial_cmp(&f_t1a), Some(Ordering::Greater));
assert_eq!(f_t4b.partial_cmp(&f_t3b), Some(Ordering::Greater));
}
#[test]
fn test_filter_clone() {
// Test that cloning filters yields the same result regardless of
// complexity.
let f_t1a = Filter::Pres(String::from("userid"));
let f_t1b = f_t1a.clone();
let f_t1c = Filter::Pres(String::from("zzzz"));
assert_eq!(f_t1a == f_t1b, true);
assert_eq!(f_t1a == f_t1c, false);
let f_t2a = Filter::And(vec![f_t1a]);
let f_t2b = f_t2a.clone();
let f_t2c = Filter::And(vec![f_t1c]);
assert_eq!(f_t2a == f_t2b, true);
assert_eq!(f_t2a == f_t2c, false);
}
#[test]
fn test_or_entry_filter() {
2019-01-29 08:17:28 +01:00
let e: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
"attrs": {
"userid": ["william"],
"uidNumber": ["1000"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
let f_t1a = Filter::Or(vec![
Filter::Eq(String::from("userid"), String::from("william")),
Filter::Eq(String::from("uidNumber"), String::from("1000")),
]);
assert!(e.entry_match_no_index(&f_t1a));
let f_t2a = Filter::Or(vec![
Filter::Eq(String::from("userid"), String::from("william")),
Filter::Eq(String::from("uidNumber"), String::from("1001")),
]);
assert!(e.entry_match_no_index(&f_t2a));
2019-03-05 06:49:48 +01:00
let f_t3a = Filter::Or(vec![
Filter::Eq(String::from("userid"), String::from("alice")),
Filter::Eq(String::from("uidNumber"), String::from("1000")),
]);
2019-03-05 06:49:48 +01:00
assert!(e.entry_match_no_index(&f_t3a));
let f_t4a = Filter::Or(vec![
Filter::Eq(String::from("userid"), String::from("alice")),
Filter::Eq(String::from("uidNumber"), String::from("1001")),
]);
assert!(!e.entry_match_no_index(&f_t4a));
}
#[test]
fn test_and_entry_filter() {
2019-01-29 08:17:28 +01:00
let e: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
"attrs": {
"userid": ["william"],
"uidNumber": ["1000"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
let f_t1a = Filter::And(vec![
Filter::Eq(String::from("userid"), String::from("william")),
Filter::Eq(String::from("uidNumber"), String::from("1000")),
]);
assert!(e.entry_match_no_index(&f_t1a));
let f_t2a = Filter::And(vec![
Filter::Eq(String::from("userid"), String::from("william")),
Filter::Eq(String::from("uidNumber"), String::from("1001")),
]);
assert!(!e.entry_match_no_index(&f_t2a));
let f_t3a = Filter::And(vec![
Filter::Eq(String::from("userid"), String::from("alice")),
Filter::Eq(String::from("uidNumber"), String::from("1000")),
]);
assert!(!e.entry_match_no_index(&f_t3a));
let f_t4a = Filter::And(vec![
Filter::Eq(String::from("userid"), String::from("alice")),
Filter::Eq(String::from("uidNumber"), String::from("1001")),
]);
assert!(!e.entry_match_no_index(&f_t4a));
}
#[test]
fn test_not_entry_filter() {
2019-01-29 08:17:28 +01:00
let e1: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
"attrs": {
"userid": ["william"],
"uidNumber": ["1000"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
2019-02-12 04:56:47 +01:00
let f_t1a = Filter::AndNot(Box::new(Filter::Eq(
2019-01-29 08:17:28 +01:00
String::from("userid"),
String::from("alice"),
)));
assert!(e1.entry_match_no_index(&f_t1a));
2019-01-21 03:59:06 +01:00
2019-02-12 04:56:47 +01:00
let f_t2a = Filter::AndNot(Box::new(Filter::Eq(
2019-01-29 08:17:28 +01:00
String::from("userid"),
String::from("william"),
)));
assert!(!e1.entry_match_no_index(&f_t2a));
}
#[test]
fn test_nested_entry_filter() {
2019-01-29 08:17:28 +01:00
let e1: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
"attrs": {
2019-01-21 03:42:34 +01:00
"class": ["person"],
"uidNumber": ["1000"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
2019-01-21 03:42:34 +01:00
2019-01-29 08:17:28 +01:00
let e2: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
2019-01-21 03:42:34 +01:00
"attrs": {
"class": ["person"],
"uidNumber": ["1001"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
2019-01-21 03:42:34 +01:00
2019-01-29 08:17:28 +01:00
let e3: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
2019-01-21 03:42:34 +01:00
"attrs": {
"class": ["person"],
"uidNumber": ["1002"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
2019-01-21 03:42:34 +01:00
2019-01-29 08:17:28 +01:00
let e4: Entry<EntryValid, EntryNew> = serde_json::from_str(
r#"{
2019-01-29 07:52:42 +01:00
"valid": null,
"state": null,
2019-01-21 03:42:34 +01:00
"attrs": {
"class": ["group"],
"uidNumber": ["1000"]
}
2019-01-29 08:17:28 +01:00
}"#,
)
.unwrap();
let f_t1a = Filter::And(vec![
Filter::Eq(String::from("class"), String::from("person")),
2019-01-21 03:42:34 +01:00
Filter::Or(vec![
Filter::Eq(String::from("uidNumber"), String::from("1001")),
Filter::Eq(String::from("uidNumber"), String::from("1000")),
]),
]);
assert!(e1.entry_match_no_index(&f_t1a));
assert!(e2.entry_match_no_index(&f_t1a));
assert!(!e3.entry_match_no_index(&f_t1a));
assert!(!e4.entry_match_no_index(&f_t1a));
}
}