From 2f66dbba5418bece15760dd9641eae39e6c1c5d2 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Wed, 28 Aug 2024 16:58:15 +0100 Subject: [PATCH] Slightly optimized expression matching. Currently we still do full-linear searching, but at least now it's no longer `O(rules)`, but `O(prefixes + suffixes)`, which is much smaller for our use cases. Eventually we'd like to improve it. --- src/build.rs | 138 +++++++++++++++--------------- src/build/match_expr.rs | 91 -------------------- src/main.rs | 4 +- src/rules.rs | 2 +- src/rules/expr.rs | 40 ++++++++- src/rules/expr/expr_tree.rs | 163 ++++++++++++++++++++++++++++++++++++ 6 files changed, 273 insertions(+), 165 deletions(-) delete mode 100644 src/build/match_expr.rs create mode 100644 src/rules/expr/expr_tree.rs diff --git a/src/build.rs b/src/build.rs index 22fb8b2..f345dd9 100644 --- a/src/build.rs +++ b/src/build.rs @@ -2,21 +2,17 @@ // Modules mod lock; -mod match_expr; // Exports pub use lock::BuildResult; // Imports use { - self::{ - lock::{BuildLock, BuildLockDepGuard}, - match_expr::match_expr, - }, + self::lock::{BuildLock, BuildLockDepGuard}, crate::{ error::ResultMultiple, expand, - rules::{Command, CommandArg, DepItem, Expr, OutItem, Rule, Target}, + rules::{Command, CommandArg, DepItem, Expr, ExprTree, OutItem, Rule, Target}, util::{self, ArcStr}, AppError, Expander, @@ -72,6 +68,11 @@ pub struct Builder { /// All rules' build lock rules_lock: DashMap, + /// Rule output tree + /// + /// Used to quickly find rules for a file + rule_output_tree: ExprTree, + /// Execution semaphore exec_semaphore: Semaphore, @@ -81,18 +82,50 @@ pub struct Builder { impl Builder { /// Creates a new builder - pub fn new(jobs: usize, stop_builds_on_first_err: bool) -> Self { + pub fn new(jobs: usize, rules: &Rules, stop_builds_on_first_err: bool) -> Result { let (event_tx, event_rx) = async_broadcast::broadcast(jobs); let event_rx = event_rx.deactivate(); - Self { + let expander = Expander::new(); + + // Create the rule output tree + let mut rule_output_tree = ExprTree::new(); + for (rule_name, rule) in &rules.rules { + for output in &rule.output { + // Expand the file first + // Expand all expressions in the output file + // Note: This doesn't expand patterns, so we can match those later + let output_file = match output { + OutItem::File { file: output_file, .. } => output_file, + }; + let expand_visitor = expand::Visitor::from_aliases([&rule.aliases, &rules.aliases]) + .with_default_pat(expand::FlowControl::Keep); + let output_file = expander.expand_expr(output_file, &expand_visitor)?; + + + // Then try to insert it + if let Some(prev_rule_name) = rule_output_tree + .insert(&output_file, rule_name.clone()) + .context("Unable to add rule output to tree") + .map_err(AppError::Other)? + { + return Err(AppError::Other(anyhow::anyhow!( + "Multiple rules match the same output file: {output_file}\n first rule: {prev_rule_name}\n \ + second rule: {rule_name}" + ))); + }; + } + } + + Ok(Self { event_tx, _event_rx: event_rx, - expander: Expander::new(), + expander, rules_lock: DashMap::new(), + rule_output_tree, exec_semaphore: Semaphore::new(jobs), stop_builds_on_first_err, - } + }) } /// Returns all build results @@ -132,39 +165,39 @@ impl Builder { ) -> Result, TargetRule)>, AppError> { let target_rule = match *target { // If we got a file, check which rule can make it - Target::File { ref file, .. } => match self.find_rule_for_file(file, rules)? { - Some((rule, pats)) => { - tracing::trace!(%target, %rule.name, "Found target rule"); - let target_rule = TargetRule { - name: rule.name.clone(), - pats, - }; - (rule, target_rule) + Target::File { ref file, .. } => match self.rule_output_tree.find(file) { + Some((name, pats)) => { + tracing::trace!(%target, %name, "Found target rule"); + TargetRule { + name, + pats: Arc::new(pats), + } }, None => return Ok(None), }, // If we got a rule name with patterns, find it and replace all patterns - Target::Rule { ref rule, ref pats } => { - // Find the rule and expand it - let rule = rules.rules.get(&**rule).ok_or_else(|| AppError::UnknownRule { - rule_name: (**rule).to_owned(), - })?; - let expand_visitor = expand::Visitor::new([&rule.aliases, &rules.aliases], [pats]); - let rule = self - .expander - .expand_rule(rule, &expand_visitor) - .map_err(AppError::expand_rule(&*rule.name))?; - let target_rule = TargetRule { - name: rule.name.clone(), - pats: Arc::clone(pats), - }; - (rule, target_rule) + Target::Rule { ref rule, ref pats } => TargetRule { + name: rule.clone(), + pats: Arc::clone(pats), }, }; - Ok(Some(target_rule)) + // Find the rule and expand it + let rule = rules + .rules + .get(&*target_rule.name) + .ok_or_else(|| AppError::UnknownRule { + rule_name: (*target_rule.name).to_owned(), + })?; + let expand_visitor = expand::Visitor::new([&rule.aliases, &rules.aliases], [&target_rule.pats]); + let rule = self + .expander + .expand_rule(rule, &expand_visitor) + .map_err(AppError::expand_rule(&*rule.name))?; + + Ok(Some((rule, target_rule))) } /// Resets a build @@ -765,43 +798,6 @@ impl Builder { Ok(()) } - - /// Finds a rule for `file` - // TODO: Not make this `O(N)` for the number of rules. - #[expect(clippy::type_complexity, reason = "TODO: Add some type aliases / struct")] - pub fn find_rule_for_file( - &self, - file: &ArcStr, - rules: &Rules, - ) -> Result, Arc>)>, AppError> { - for rule in rules.rules.values() { - for output in &rule.output { - // Expand all expressions in the output file - // Note: This doesn't expand patterns, so we can match those later - let output_file = match output { - OutItem::File { file: output_file, .. } => output_file, - }; - let expand_visitor = expand::Visitor::from_aliases([&rule.aliases, &rules.aliases]) - .with_default_pat(expand::FlowControl::Keep); - let output_file = self.expander.expand_expr(output_file, &expand_visitor)?; - - // Then try to match the output file to the file we need to create - if let Some(rule_pats) = self::match_expr(&output_file, &output_file.cmpts, file.clone())? { - let rule_pats = Arc::new(rule_pats); - - let expand_visitor = expand::Visitor::new([&rule.aliases, &rules.aliases], [&rule_pats]); - let rule = self - .expander - .expand_rule(rule, &expand_visitor) - .map_err(AppError::expand_rule(&*rule.name))?; - return Ok(Some((rule, rule_pats))); - } - } - } - - // If we got here, there was no matching rule - Ok(None) - } } /// Parses a dependencies file diff --git a/src/build/match_expr.rs b/src/build/match_expr.rs deleted file mode 100644 index 10909b9..0000000 --- a/src/build/match_expr.rs +++ /dev/null @@ -1,91 +0,0 @@ -//! Expression matching - -// Imports -use { - crate::{ - rules::{Expr, ExprCmpt, PatternOp}, - util::ArcStr, - AppError, - }, - std::{assert_matches::assert_matches, collections::BTreeMap}, -}; - -/// Returns if `value` matches all `cmpts` and returns all patterns resolved -/// -/// Returns error if more than 1 pattern is found. -/// -/// Panics if any aliases are found -pub fn match_expr( - expr: &Expr, - cmpts: &[ExprCmpt], - mut value: ArcStr, -) -> Result>, AppError> { - let mut patterns = BTreeMap::new(); - - // Until `rhs` has anything to match left - let mut cur_cmpts = cmpts; - loop { - match cur_cmpts { - // If we start with a string, strip the prefix off - [ExprCmpt::String(lhs), rest @ ..] => match value.strip_prefix(&**lhs) { - Some(new_rhs) => { - cur_cmpts = rest; - value = new_rhs; - }, - None => return Ok(None), - }, - - // If we end with a string, strip the suffix off - [rest @ .., ExprCmpt::String(lhs)] => match value.strip_suffix(&**lhs) { - Some(new_rhs) => { - cur_cmpts = rest; - value = new_rhs; - }, - None => return Ok(None), - }, - - // If we're a single pattern, check for operators - [ExprCmpt::Pattern(pat)] => { - let mut ops = pat.ops.as_slice(); - loop { - match ops { - // If we're empty, match everything - [] => break, - - // On non-empty check if the rest of the value is empty - [PatternOp::NonEmpty, rest @ ..] => match value.is_empty() { - // If so, we don't match anything - true => return Ok(None), - - // Else continue checking the rest of the operators - false => ops = rest, - }, - } - } - - // If we get here, match everything - // TODO: Borrow some cases? - let prev_value = patterns.insert(pat.name.clone(), value); - assert_matches!(prev_value, None, "Found repeated pattern"); - cur_cmpts = &[]; - value = ArcStr::default(); - }, - - // If we have patterns on both sides, reject - [ExprCmpt::Pattern(_), .., ExprCmpt::Pattern(_)] => - return Err(AppError::MatchExprTooManyPats { - expr: expr.to_string(), - expr_cmpts: cmpts.iter().map(ExprCmpt::to_string).collect(), - }), - // If we have aliases on any side, reject - [ExprCmpt::Alias(alias), ..] | [.., ExprCmpt::Alias(alias)] => - unreachable!("Cannot match unexpanded alias: {alias:?}"), - - // If we're empty, we match an empty string - [] => match &*value { - "" => return Ok(Some(patterns)), - _ => return Ok(None), - }, - } - } -} diff --git a/src/main.rs b/src/main.rs index c989c60..af0382e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -192,7 +192,9 @@ async fn run(args: Args) -> ExitResult { // Create the builder // Note: We should stop builds on the first error if we're *not* watching. - let builder = Builder::new(jobs, !args.watch); + let builder = Builder::new(jobs, &rules, !args.watch) + .context("Unable to create builder") + .map_err(AppError::Other)?; let builder = Arc::new(builder); // Then create the watcher, if we're watching diff --git a/src/rules.rs b/src/rules.rs index e85d800..403c02b 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -11,7 +11,7 @@ mod target; // Exports pub use { alias::AliasOp, - expr::{Expr, ExprCmpt}, + expr::{Expr, ExprCmpt, ExprTree}, item::{DepItem, OutItem}, pattern::PatternOp, rule::{Command, CommandArg, Exec, Rule}, diff --git a/src/rules/expr.rs b/src/rules/expr.rs index ac2177e..01f2891 100644 --- a/src/rules/expr.rs +++ b/src/rules/expr.rs @@ -1,5 +1,11 @@ //! Expressions +// Modules +pub mod expr_tree; + +// Exports +pub use self::expr_tree::ExprTree; + // Imports use { super::{ @@ -24,7 +30,23 @@ pub enum ExprCmpt { } impl ExprCmpt { - /// Converts this expression into a string, if it's a string. + /// Returns `true` if the component is [`ExprCmpt::String`]. + #[must_use] + pub const fn is_string(&self) -> bool { + matches!(self, Self::String(_)) + } + + /// Returns this expression as a string, if it is one. + #[must_use] + pub const fn as_string(&self) -> Option<&ArcStr> { + #[expect(clippy::wildcard_enum_match_arm, reason = "We only care about a specific variant")] + match self { + Self::String(v) => Some(v), + _ => None, + } + } + + /// Converts this component into a string, if it's a string. pub fn try_into_string(self) -> Result { #[expect(clippy::wildcard_enum_match_arm, reason = "We only care about a specific variant")] match self { @@ -32,6 +54,22 @@ impl ExprCmpt { _ => Err(self), } } + + /// Returns `true` if the component is [`ExprCmpt::Pattern`]. + #[must_use] + pub const fn is_pattern(&self) -> bool { + matches!(self, Self::Pattern(_)) + } + + /// Returns this expression as a pattern, if it is one. + #[must_use] + pub const fn as_pattern(&self) -> Option<&Pattern> { + #[expect(clippy::wildcard_enum_match_arm, reason = "We only care about a specific variant")] + match self { + Self::Pattern(v) => Some(v), + _ => None, + } + } } /// Expression diff --git a/src/rules/expr/expr_tree.rs b/src/rules/expr/expr_tree.rs new file mode 100644 index 0000000..a966149 --- /dev/null +++ b/src/rules/expr/expr_tree.rs @@ -0,0 +1,163 @@ +//! Expression tree + +use { + super::Expr, + crate::{ + error::AppError, + rules::{pattern::Pattern, PatternOp}, + util::ArcStr, + }, + itertools::{Itertools, PeekingNext}, + std::collections::BTreeMap, +}; + +/// An expression tree. +// TODO: Currently we just match against all possible prefixes and suffixes. +// Try to actually create a fast, non `O(n)`, algorithm? +#[derive(Debug)] +pub struct ExprTree { + /// Prefixes + prefixes: PrefixTree, +} + +// TODO: Flatten this? +type PrefixTree = BTreeMap>; +type SuffixTree = BTreeMap, K)>; + +impl ExprTree { + /// Creates a new, empty, expression tree + pub const fn new() -> Self { + Self { + prefixes: BTreeMap::new(), + } + } + + /// Adds an expression to the suffix tree, associated with a key. + /// + /// The expression must not contain any aliases. + /// + /// Returns the old key if the expression already existed. + pub fn insert(&mut self, expr: &Expr, key: K) -> Result, AppError> { + let mut cmpts = expr.cmpts.iter(); + + // Get all components from the start that are strings + let prefix = cmpts + .by_ref() + .peeking_take_while(|cmpt| cmpt.is_string()) + .map(|cmpt| &**cmpt.as_string().expect("Just checked")) + .collect::(); + let prefix = ArcStr::from(prefix); + + // Get the (possible) pattern in the middle + let pat = cmpts + .peeking_next(|cmpt| cmpt.is_pattern()) + .map(|cmpt| cmpt.as_pattern().expect("Just checked")) + .cloned(); + + // Then get the rest of the string + let suffix = cmpts + .peeking_take_while(|cmpt| cmpt.is_string()) + .map(|cmpt| &**cmpt.as_string().expect("Just checked")) + .collect::(); + let suffix = ArcStr::from(suffix); + + // After this the expression should be empty + if let Some(cmpt) = cmpts.next() { + return Err(AppError::Other(anyhow::anyhow!( + "Unexpected component in expression {expr}: {cmpt}" + ))); + } + + // Finally try to insert and retrieve the old key, if any. + let old_key = self + .prefixes + .entry(prefix) + .or_default() + .insert(suffix, (pat, key)) + .map(|(_, old_key)| old_key); + + Ok(old_key) + } + + /// Matches a string against this expression tree. + /// + /// Returns the first match with patterns resolved. + // TODO: Since we only support a single pattern, return `Option` for it instead. + pub fn find(&self, value: &str) -> Option<(K, BTreeMap)> + where + K: Clone, + { + for (prefix, suffixes) in &self.prefixes { + // If the prefix no longer matches, try the next + let Some(value_rest) = value.strip_prefix(&**prefix) else { + continue; + }; + + // Try to find match the suffixes + if let Some((key, pats)) = Self::find_match_suffix(value_rest, suffixes) { + return Some((key, pats)); + } + } + + None + } + + /// Finds a matching suffix for `value` from the suffix map. + fn find_match_suffix(value: &str, suffixes: &SuffixTree) -> Option<(K, BTreeMap)> + where + K: Clone, + { + // Try to match against an empty suffix + // Note: We always do this, since some expressions could be + // of the form `(String, Pat, Empty)`, with `Pat` matching + // the rest. + if let Some((pat, key)) = suffixes.get("") && + let Some(pats) = Self::find_match_pat("", pat) + { + return Some((key.clone(), pats)); + } + + // Otherwise, match against all other suffixes + for (suffix, (pat, key)) in suffixes { + // If the prefix no longer matches, try the next + let Some(pat_value) = value.strip_suffix(&**suffix) else { + continue; + }; + + // Otherwise, we might have found the final value, so test it + if let Some(pats) = Self::find_match_pat(pat_value, pat) { + return Some((key.clone(), pats)); + } + } + + None + } + + /// Matches a pattern against a remaining value after it's prefix and suffix have been stripped + fn find_match_pat(value: &str, pat: &Option) -> Option> { + let pats = match pat { + // If there is any pattern, try to match it + Some(pat) => { + for op in &pat.ops { + match op { + // If it needs to be non-empty, check + PatternOp::NonEmpty => match value.is_empty() { + true => return None, + false => continue, + }, + } + } + + BTreeMap::from([(pat.name.clone(), value.into())]) + }, + + // Otherwise, we match if the value is empty + _ => match value.is_empty() { + true => BTreeMap::new(), + false => return None, + }, + }; + + Some(pats) + } +}