From 68b8bf71fbf96b54127f26e99d9e36dbc1bf07a1 Mon Sep 17 00:00:00 2001 From: Firstyear Date: Wed, 12 Apr 2023 15:53:02 +1000 Subject: [PATCH] Improve unicode control character detection (#1539) --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + server/lib/Cargo.toml | 1 + server/lib/src/value.rs | 32 +++++++++++++++++--------------- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 31e978756..4ce96a259 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2453,6 +2453,7 @@ dependencies = [ "toml", "touch", "tracing", + "unicode-general-category", "url", "urlencoding", "users", @@ -4782,6 +4783,12 @@ version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +[[package]] +name = "unicode-general-category" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2281c8c1d221438e373249e065ca4989c4c36952c211ff21a0ee91c44a3869e7" + [[package]] name = "unicode-ident" version = "1.0.6" diff --git a/Cargo.toml b/Cargo.toml index 843bded4b..8b96d34c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -152,6 +152,7 @@ tracing-subscriber = { version = "^0.3.16", features = ["env-filter"] } # tracing-forest = { path = "/Users/william/development/tracing-forest/tracing-forest" } tracing-forest = { git = "https://github.com/QnnOkabayashi/tracing-forest.git", rev = "77daf8c8abf010b87d45ece2bf656983c6f8cecb" } +unicode-general-category = "0.6.0" url = "^2.3.1" urlencoding = "2.1.2" users = "^0.11.0" diff --git a/server/lib/Cargo.toml b/server/lib/Cargo.toml index fd5d36e30..4d3d792b7 100644 --- a/server/lib/Cargo.toml +++ b/server/lib/Cargo.toml @@ -63,6 +63,7 @@ nonempty = { workspace = true, features = ["serialize"] } tracing = { workspace = true, features = ["attributes"] } +unicode-general-category.workspace = true url = { workspace = true, features = ["serde"] } urlencoding.workspace = true uuid = { workspace = true, features = ["serde", "v4" ] } diff --git a/server/lib/src/value.rs b/server/lib/src/value.rs index 917d48ad1..a14d8c60d 100644 --- a/server/lib/src/value.rs +++ b/server/lib/src/value.rs @@ -76,12 +76,6 @@ lazy_static! { #[allow(clippy::expect_used)] Regex::new("[\n\r\t]").expect("Invalid singleline regex found") }; - - pub static ref ESCAPES_RE: Regex = { - #[allow(clippy::expect_used)] - Regex::new(r"\x1b\[([\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e])") - .expect("Invalid escapes regex found") - }; } #[derive(Debug, Clone, PartialOrd, Ord, Eq, PartialEq, Hash)] @@ -1701,15 +1695,23 @@ impl Value { pub(crate) fn validate_str_escapes(s: &str) -> bool { // Look for and prevent certain types of string escapes and injections. - if !ESCAPES_RE.is_match(s) { - true - } else { - warn!( - "value contains invalid escape chars forbidden by \"{}\"", - *ESCAPES_RE - ); - false - } + // Formerly checked with + /* + pub static ref ESCAPES_RE: Regex = { + #[allow(clippy::expect_used)] + Regex::new(r"\x1b\[([\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e])") + .expect("Invalid escapes regex found") + }; + */ + use unicode_general_category::{get_general_category, GeneralCategory}; + + s.chars().all(|c| match get_general_category(c) { + GeneralCategory::Control => { + warn!("value contains invalid unicode control character",); + false + } + _ => true, + }) } }