Slightly optimized expression matching.

Currently we still do full-linear searching, but at least now it's no longer `O(rules)`, but `O(prefixes + suffixes)`, which is much smaller for our use cases.
Eventually we'd like to improve it.
This commit is contained in:
Filipe Rodrigues 2024-08-28 16:58:15 +01:00
parent 7ff924ac68
commit 2f66dbba54
Signed by: zenithsiz
SSH Key Fingerprint: SHA256:Mb5ppb3Sh7IarBO/sBTXLHbYEOz37hJAlslLQPPAPaU
6 changed files with 273 additions and 165 deletions

View File

@ -2,21 +2,17 @@
// Modules
mod lock;
mod match_expr;
// Exports
pub use lock::BuildResult;
// Imports
use {
self::{
lock::{BuildLock, BuildLockDepGuard},
match_expr::match_expr,
},
self::lock::{BuildLock, BuildLockDepGuard},
crate::{
error::ResultMultiple,
expand,
rules::{Command, CommandArg, DepItem, Expr, OutItem, Rule, Target},
rules::{Command, CommandArg, DepItem, Expr, ExprTree, OutItem, Rule, Target},
util::{self, ArcStr},
AppError,
Expander,
@ -72,6 +68,11 @@ pub struct Builder {
/// All rules' build lock
rules_lock: DashMap<TargetRule, BuildLock>,
/// Rule output tree
///
/// Used to quickly find rules for a file
rule_output_tree: ExprTree<ArcStr>,
/// Execution semaphore
exec_semaphore: Semaphore,
@ -81,18 +82,50 @@ pub struct Builder {
impl Builder {
/// Creates a new builder
pub fn new(jobs: usize, stop_builds_on_first_err: bool) -> Self {
pub fn new(jobs: usize, rules: &Rules, stop_builds_on_first_err: bool) -> Result<Self, AppError> {
let (event_tx, event_rx) = async_broadcast::broadcast(jobs);
let event_rx = event_rx.deactivate();
Self {
let expander = Expander::new();
// Create the rule output tree
let mut rule_output_tree = ExprTree::new();
for (rule_name, rule) in &rules.rules {
for output in &rule.output {
// Expand the file first
// Expand all expressions in the output file
// Note: This doesn't expand patterns, so we can match those later
let output_file = match output {
OutItem::File { file: output_file, .. } => output_file,
};
let expand_visitor = expand::Visitor::from_aliases([&rule.aliases, &rules.aliases])
.with_default_pat(expand::FlowControl::Keep);
let output_file = expander.expand_expr(output_file, &expand_visitor)?;
// Then try to insert it
if let Some(prev_rule_name) = rule_output_tree
.insert(&output_file, rule_name.clone())
.context("Unable to add rule output to tree")
.map_err(AppError::Other)?
{
return Err(AppError::Other(anyhow::anyhow!(
"Multiple rules match the same output file: {output_file}\n first rule: {prev_rule_name}\n \
second rule: {rule_name}"
)));
};
}
}
Ok(Self {
event_tx,
_event_rx: event_rx,
expander: Expander::new(),
expander,
rules_lock: DashMap::new(),
rule_output_tree,
exec_semaphore: Semaphore::new(jobs),
stop_builds_on_first_err,
}
})
}
/// Returns all build results
@ -132,39 +165,39 @@ impl Builder {
) -> Result<Option<(Rule<ArcStr>, TargetRule)>, AppError> {
let target_rule = match *target {
// If we got a file, check which rule can make it
Target::File { ref file, .. } => match self.find_rule_for_file(file, rules)? {
Some((rule, pats)) => {
tracing::trace!(%target, %rule.name, "Found target rule");
let target_rule = TargetRule {
name: rule.name.clone(),
pats,
};
(rule, target_rule)
Target::File { ref file, .. } => match self.rule_output_tree.find(file) {
Some((name, pats)) => {
tracing::trace!(%target, %name, "Found target rule");
TargetRule {
name,
pats: Arc::new(pats),
}
},
None => return Ok(None),
},
// If we got a rule name with patterns, find it and replace all patterns
Target::Rule { ref rule, ref pats } => {
// Find the rule and expand it
let rule = rules.rules.get(&**rule).ok_or_else(|| AppError::UnknownRule {
rule_name: (**rule).to_owned(),
})?;
let expand_visitor = expand::Visitor::new([&rule.aliases, &rules.aliases], [pats]);
let rule = self
.expander
.expand_rule(rule, &expand_visitor)
.map_err(AppError::expand_rule(&*rule.name))?;
let target_rule = TargetRule {
name: rule.name.clone(),
pats: Arc::clone(pats),
};
(rule, target_rule)
Target::Rule { ref rule, ref pats } => TargetRule {
name: rule.clone(),
pats: Arc::clone(pats),
},
};
Ok(Some(target_rule))
// Find the rule and expand it
let rule = rules
.rules
.get(&*target_rule.name)
.ok_or_else(|| AppError::UnknownRule {
rule_name: (*target_rule.name).to_owned(),
})?;
let expand_visitor = expand::Visitor::new([&rule.aliases, &rules.aliases], [&target_rule.pats]);
let rule = self
.expander
.expand_rule(rule, &expand_visitor)
.map_err(AppError::expand_rule(&*rule.name))?;
Ok(Some((rule, target_rule)))
}
/// Resets a build
@ -765,43 +798,6 @@ impl Builder {
Ok(())
}
/// Finds a rule for `file`
// TODO: Not make this `O(N)` for the number of rules.
#[expect(clippy::type_complexity, reason = "TODO: Add some type aliases / struct")]
pub fn find_rule_for_file(
&self,
file: &ArcStr,
rules: &Rules,
) -> Result<Option<(Rule<ArcStr>, Arc<BTreeMap<ArcStr, ArcStr>>)>, AppError> {
for rule in rules.rules.values() {
for output in &rule.output {
// Expand all expressions in the output file
// Note: This doesn't expand patterns, so we can match those later
let output_file = match output {
OutItem::File { file: output_file, .. } => output_file,
};
let expand_visitor = expand::Visitor::from_aliases([&rule.aliases, &rules.aliases])
.with_default_pat(expand::FlowControl::Keep);
let output_file = self.expander.expand_expr(output_file, &expand_visitor)?;
// Then try to match the output file to the file we need to create
if let Some(rule_pats) = self::match_expr(&output_file, &output_file.cmpts, file.clone())? {
let rule_pats = Arc::new(rule_pats);
let expand_visitor = expand::Visitor::new([&rule.aliases, &rules.aliases], [&rule_pats]);
let rule = self
.expander
.expand_rule(rule, &expand_visitor)
.map_err(AppError::expand_rule(&*rule.name))?;
return Ok(Some((rule, rule_pats)));
}
}
}
// If we got here, there was no matching rule
Ok(None)
}
}
/// Parses a dependencies file

View File

@ -1,91 +0,0 @@
//! Expression matching
// Imports
use {
crate::{
rules::{Expr, ExprCmpt, PatternOp},
util::ArcStr,
AppError,
},
std::{assert_matches::assert_matches, collections::BTreeMap},
};
/// Returns if `value` matches all `cmpts` and returns all patterns resolved
///
/// Returns error if more than 1 pattern is found.
///
/// Panics if any aliases are found
pub fn match_expr(
expr: &Expr,
cmpts: &[ExprCmpt],
mut value: ArcStr,
) -> Result<Option<BTreeMap<ArcStr, ArcStr>>, AppError> {
let mut patterns = BTreeMap::new();
// Until `rhs` has anything to match left
let mut cur_cmpts = cmpts;
loop {
match cur_cmpts {
// If we start with a string, strip the prefix off
[ExprCmpt::String(lhs), rest @ ..] => match value.strip_prefix(&**lhs) {
Some(new_rhs) => {
cur_cmpts = rest;
value = new_rhs;
},
None => return Ok(None),
},
// If we end with a string, strip the suffix off
[rest @ .., ExprCmpt::String(lhs)] => match value.strip_suffix(&**lhs) {
Some(new_rhs) => {
cur_cmpts = rest;
value = new_rhs;
},
None => return Ok(None),
},
// If we're a single pattern, check for operators
[ExprCmpt::Pattern(pat)] => {
let mut ops = pat.ops.as_slice();
loop {
match ops {
// If we're empty, match everything
[] => break,
// On non-empty check if the rest of the value is empty
[PatternOp::NonEmpty, rest @ ..] => match value.is_empty() {
// If so, we don't match anything
true => return Ok(None),
// Else continue checking the rest of the operators
false => ops = rest,
},
}
}
// If we get here, match everything
// TODO: Borrow some cases?
let prev_value = patterns.insert(pat.name.clone(), value);
assert_matches!(prev_value, None, "Found repeated pattern");
cur_cmpts = &[];
value = ArcStr::default();
},
// If we have patterns on both sides, reject
[ExprCmpt::Pattern(_), .., ExprCmpt::Pattern(_)] =>
return Err(AppError::MatchExprTooManyPats {
expr: expr.to_string(),
expr_cmpts: cmpts.iter().map(ExprCmpt::to_string).collect(),
}),
// If we have aliases on any side, reject
[ExprCmpt::Alias(alias), ..] | [.., ExprCmpt::Alias(alias)] =>
unreachable!("Cannot match unexpanded alias: {alias:?}"),
// If we're empty, we match an empty string
[] => match &*value {
"" => return Ok(Some(patterns)),
_ => return Ok(None),
},
}
}
}

View File

@ -192,7 +192,9 @@ async fn run(args: Args) -> ExitResult {
// Create the builder
// Note: We should stop builds on the first error if we're *not* watching.
let builder = Builder::new(jobs, !args.watch);
let builder = Builder::new(jobs, &rules, !args.watch)
.context("Unable to create builder")
.map_err(AppError::Other)?;
let builder = Arc::new(builder);
// Then create the watcher, if we're watching

View File

@ -11,7 +11,7 @@ mod target;
// Exports
pub use {
alias::AliasOp,
expr::{Expr, ExprCmpt},
expr::{Expr, ExprCmpt, ExprTree},
item::{DepItem, OutItem},
pattern::PatternOp,
rule::{Command, CommandArg, Exec, Rule},

View File

@ -1,5 +1,11 @@
//! Expressions
// Modules
pub mod expr_tree;
// Exports
pub use self::expr_tree::ExprTree;
// Imports
use {
super::{
@ -24,7 +30,23 @@ pub enum ExprCmpt {
}
impl ExprCmpt {
/// Converts this expression into a string, if it's a string.
/// Returns `true` if the component is [`ExprCmpt::String`].
#[must_use]
pub const fn is_string(&self) -> bool {
matches!(self, Self::String(_))
}
/// Returns this expression as a string, if it is one.
#[must_use]
pub const fn as_string(&self) -> Option<&ArcStr> {
#[expect(clippy::wildcard_enum_match_arm, reason = "We only care about a specific variant")]
match self {
Self::String(v) => Some(v),
_ => None,
}
}
/// Converts this component into a string, if it's a string.
pub fn try_into_string(self) -> Result<ArcStr, Self> {
#[expect(clippy::wildcard_enum_match_arm, reason = "We only care about a specific variant")]
match self {
@ -32,6 +54,22 @@ impl ExprCmpt {
_ => Err(self),
}
}
/// Returns `true` if the component is [`ExprCmpt::Pattern`].
#[must_use]
pub const fn is_pattern(&self) -> bool {
matches!(self, Self::Pattern(_))
}
/// Returns this expression as a pattern, if it is one.
#[must_use]
pub const fn as_pattern(&self) -> Option<&Pattern> {
#[expect(clippy::wildcard_enum_match_arm, reason = "We only care about a specific variant")]
match self {
Self::Pattern(v) => Some(v),
_ => None,
}
}
}
/// Expression

163
src/rules/expr/expr_tree.rs Normal file
View File

@ -0,0 +1,163 @@
//! Expression tree
use {
super::Expr,
crate::{
error::AppError,
rules::{pattern::Pattern, PatternOp},
util::ArcStr,
},
itertools::{Itertools, PeekingNext},
std::collections::BTreeMap,
};
/// An expression tree.
// TODO: Currently we just match against all possible prefixes and suffixes.
// Try to actually create a fast, non `O(n)`, algorithm?
#[derive(Debug)]
pub struct ExprTree<K> {
/// Prefixes
prefixes: PrefixTree<K>,
}
// TODO: Flatten this?
type PrefixTree<K> = BTreeMap<ArcStr, SuffixTree<K>>;
type SuffixTree<K> = BTreeMap<ArcStr, (Option<Pattern>, K)>;
impl<K> ExprTree<K> {
/// Creates a new, empty, expression tree
pub const fn new() -> Self {
Self {
prefixes: BTreeMap::new(),
}
}
/// Adds an expression to the suffix tree, associated with a key.
///
/// The expression must not contain any aliases.
///
/// Returns the old key if the expression already existed.
pub fn insert(&mut self, expr: &Expr, key: K) -> Result<Option<K>, AppError> {
let mut cmpts = expr.cmpts.iter();
// Get all components from the start that are strings
let prefix = cmpts
.by_ref()
.peeking_take_while(|cmpt| cmpt.is_string())
.map(|cmpt| &**cmpt.as_string().expect("Just checked"))
.collect::<String>();
let prefix = ArcStr::from(prefix);
// Get the (possible) pattern in the middle
let pat = cmpts
.peeking_next(|cmpt| cmpt.is_pattern())
.map(|cmpt| cmpt.as_pattern().expect("Just checked"))
.cloned();
// Then get the rest of the string
let suffix = cmpts
.peeking_take_while(|cmpt| cmpt.is_string())
.map(|cmpt| &**cmpt.as_string().expect("Just checked"))
.collect::<String>();
let suffix = ArcStr::from(suffix);
// After this the expression should be empty
if let Some(cmpt) = cmpts.next() {
return Err(AppError::Other(anyhow::anyhow!(
"Unexpected component in expression {expr}: {cmpt}"
)));
}
// Finally try to insert and retrieve the old key, if any.
let old_key = self
.prefixes
.entry(prefix)
.or_default()
.insert(suffix, (pat, key))
.map(|(_, old_key)| old_key);
Ok(old_key)
}
/// Matches a string against this expression tree.
///
/// Returns the first match with patterns resolved.
// TODO: Since we only support a single pattern, return `Option<ArcStr>` for it instead.
pub fn find(&self, value: &str) -> Option<(K, BTreeMap<ArcStr, ArcStr>)>
where
K: Clone,
{
for (prefix, suffixes) in &self.prefixes {
// If the prefix no longer matches, try the next
let Some(value_rest) = value.strip_prefix(&**prefix) else {
continue;
};
// Try to find match the suffixes
if let Some((key, pats)) = Self::find_match_suffix(value_rest, suffixes) {
return Some((key, pats));
}
}
None
}
/// Finds a matching suffix for `value` from the suffix map.
fn find_match_suffix(value: &str, suffixes: &SuffixTree<K>) -> Option<(K, BTreeMap<ArcStr, ArcStr>)>
where
K: Clone,
{
// Try to match against an empty suffix
// Note: We always do this, since some expressions could be
// of the form `(String, Pat, Empty)`, with `Pat` matching
// the rest.
if let Some((pat, key)) = suffixes.get("") &&
let Some(pats) = Self::find_match_pat("", pat)
{
return Some((key.clone(), pats));
}
// Otherwise, match against all other suffixes
for (suffix, (pat, key)) in suffixes {
// If the prefix no longer matches, try the next
let Some(pat_value) = value.strip_suffix(&**suffix) else {
continue;
};
// Otherwise, we might have found the final value, so test it
if let Some(pats) = Self::find_match_pat(pat_value, pat) {
return Some((key.clone(), pats));
}
}
None
}
/// Matches a pattern against a remaining value after it's prefix and suffix have been stripped
fn find_match_pat(value: &str, pat: &Option<Pattern>) -> Option<BTreeMap<ArcStr, ArcStr>> {
let pats = match pat {
// If there is any pattern, try to match it
Some(pat) => {
for op in &pat.ops {
match op {
// If it needs to be non-empty, check
PatternOp::NonEmpty => match value.is_empty() {
true => return None,
false => continue,
},
}
}
BTreeMap::from([(pat.name.clone(), value.into())])
},
// Otherwise, we match if the value is empty
_ => match value.is_empty() {
true => BTreeMap::new(),
false => return None,
},
};
Some(pats)
}
}