ArcStr now uses yoke to avoid unsafe.

This commit is contained in:
Filipe Rodrigues 2025-02-03 09:54:59 +00:00
parent 4d6991314a
commit a06d6185eb
Signed by: zenithsiz
SSH Key Fingerprint: SHA256:Mb5ppb3Sh7IarBO/sBTXLHbYEOz37hJAlslLQPPAPaU
4 changed files with 84 additions and 129 deletions

50
.vscode/settings.json vendored
View File

@ -1,27 +1,27 @@
{
"cSpell.words": [
"Cmds",
"Cmpt",
"cmpts",
"dashmap",
"debouncer",
"filetime",
"indexmap",
"inotify",
"itertools",
"mapref",
"metavar",
"npath",
"oneshot",
"petgraph",
"PKGBUILD",
"rwlock",
"smallvec",
"tempdir",
"thiserror",
"yeet",
"Zbuild",
"zutil"
],
"rust-analyzer.cargo.features": "all"
"cSpell.words": [
"Cmds",
"Cmpt",
"cmpts",
"dashmap",
"debouncer",
"filetime",
"indexmap",
"inotify",
"itertools",
"mapref",
"metavar",
"npath",
"oneshot",
"petgraph",
"PKGBUILD",
"rwlock",
"smallvec",
"tempdir",
"thiserror",
"yeet",
"Zbuild",
"zutil"
],
"rust-analyzer.cargo.features": "all"
}

24
Cargo.lock generated
View File

@ -1356,6 +1356,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "strsim"
version = "0.11.1"
@ -1795,6 +1801,17 @@ dependencies = [
"bitflags 2.8.0",
]
[[package]]
name = "yoke"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
dependencies = [
"serde",
"stable_deref_trait",
"zerofrom",
]
[[package]]
name = "zbuild"
version = "0.1.9"
@ -1817,6 +1834,7 @@ dependencies = [
"tracing-subscriber",
"tracing-test",
"unicode-ident",
"yoke",
"zutil-app-error",
]
@ -1841,6 +1859,12 @@ dependencies = [
"syn",
]
[[package]]
name = "zerofrom"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
[[package]]
name = "zutil-app-error"
version = "0.1.0"

View File

@ -26,6 +26,7 @@ tokio-stream = "0.1.17"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
unicode-ident = "1.0.16"
yoke = "0.7.5"
zutil-app-error = { git = "https://github.com/Zenithsiz/zutil", rev = "5363bba6ced162185a1eb5a132cce499bfc5d818" }
[dev-dependencies]

View File

@ -1,18 +1,18 @@
//! Arc string
// Lints
#![expect(unsafe_code, reason = "We need unsafe to implement our string 'cached' pointer")]
// Imports
use std::{
borrow::Borrow,
cmp,
fmt,
hash::{Hash, Hasher},
mem,
ops::{Deref, Range},
str::pattern::{Pattern, ReverseSearcher},
sync::Arc,
use {
std::{
borrow::Borrow,
cmp,
fmt,
hash::{Hash, Hasher},
mem,
ops::{Deref, Range},
str::pattern::{Pattern, ReverseSearcher},
sync::Arc,
},
yoke::Yoke,
};
/// Arc string.
@ -25,21 +25,16 @@ use std::{
/// accessible as a `String`.
#[derive(Clone)]
pub struct ArcStr {
/// This string's pointer
///
/// The `'static` lifetime is a lie, but we never hand it out as `'static`,
/// only as `'self`, so this is fine.
ptr: &'static str,
/// Inner
// Note: We need an `Arc<String>` for efficient conversion to/from `String`
inner: Arc<String>,
inner: Yoke<&'static str, Arc<String>>,
}
impl ArcStr {
/// Returns the range of this string compared to the base
fn base_range(&self) -> Range<usize> {
self.inner
.backing_cart()
.substr_range(self)
.expect("String pointer should be within allocation")
}
@ -51,11 +46,12 @@ impl ArcStr {
where
F: FnOnce(&mut String) -> R,
{
// Get the offset and length of our specific string
// Get the range of our specific string
let range = self.base_range();
// Get the inner string
let s = match Arc::get_mut(&mut self.inner) {
let mut inner = mem::take(self).inner.into_backing_cart();
let s = match Arc::get_mut(&mut inner) {
// If we're unique, slice the parts we don't care about and return
Some(s) => {
s.truncate(range.end);
@ -66,22 +62,16 @@ impl ArcStr {
// Otherwise copy
None => {
self.inner = Arc::new(self.to_string());
Arc::get_mut(&mut self.inner).expect("Should be unique")
inner = Arc::new(inner[range].to_owned());
Arc::get_mut(&mut inner).expect("Should be unique")
},
};
// Invalidate our string pointer in case of a panic.
self.ptr = "";
// Then mutate
let output = f(s);
// And finally, reconstruct ourselves
// SAFETY: We never hand out the `'static` string, and we ensure
// it's kept alive, as it's derived from our `inner` field,
// which we own.
self.ptr = unsafe { self::extend_static(s.as_str()) };
*self = Self::from(inner);
output
}
@ -92,10 +82,8 @@ impl ArcStr {
/// `s` must be derived from this string, else this method panics.
pub fn slice_from_str(&self, s: &str) -> Self {
let range = self.substr_range(s).expect("Input was not a substring of this string");
Self {
ptr: &self.ptr[range],
inner: Arc::clone(&self.inner),
}
let inner = self.inner.map_project_cloned(|s, _| &s[range]);
Self { inner }
}
/// Slices this string
@ -120,14 +108,6 @@ impl ArcStr {
}
}
/// Extends the lifetime of `s` to be static.
///
/// # Safety
/// This can only be used for strings that are assigned to `ArcStr::ptr`
unsafe fn extend_static(s: &str) -> &'static str {
unsafe { mem::transmute::<&str, &'static str>(s) }
}
impl PartialEq for ArcStr {
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
@ -173,7 +153,7 @@ impl Deref for ArcStr {
type Target = str;
fn deref(&self) -> &Self::Target {
self.ptr
self.inner.get()
}
}
@ -185,22 +165,25 @@ impl Borrow<str> for ArcStr {
impl From<String> for ArcStr {
fn from(s: String) -> Self {
Self::from(Arc::new(s))
}
}
impl From<Arc<String>> for ArcStr {
fn from(s: Arc<String>) -> Self {
Self {
// SAFETY: We never hand out the `'static` string, and we ensure
// it's kept alive, as it's derived from our `inner` field,
// which we own.
ptr: unsafe { self::extend_static(s.as_str()) },
inner: Arc::new(s),
inner: Yoke::attach_to_cart(s, |s| &**s),
}
}
}
impl From<ArcStr> for String {
fn from(s: ArcStr) -> Self {
// Get the offset and length of our specific string
// Get the range of our specific string
let range = s.base_range();
match Arc::try_unwrap(s.inner) {
let inner = s.inner.into_backing_cart();
match Arc::try_unwrap(inner) {
// If we're unique, slice the parts we don't care about and return
Ok(mut inner) => {
inner.truncate(range.end);
@ -210,60 +193,7 @@ impl From<ArcStr> for String {
},
// Otherwise copy
Err(inner) => ArcStr { inner, ..s }.to_string(),
Err(inner) => inner[range].to_owned(),
}
}
}
#[cfg(test)]
mod tests {
use {
super::*,
std::{hint::black_box, sync::Mutex},
};
#[test]
fn create() {
let s = ArcStr::from("Test".to_owned());
_ = black_box(&*s);
}
#[test]
fn mutate() {
let mut s1 = ArcStr::from("Test".to_owned());
let s2 = s1.clone();
s1.with_mut(|s| {
let cap = s.capacity();
s.push_str(&"A".repeat(100));
assert!(s.capacity() > cap, "Did not re-allocate");
});
_ = black_box(&*s1);
_ = black_box(&*s2);
}
#[test]
fn slice_from_str() {
let s1 = ArcStr::from("Test".to_owned());
let s2 = s1.slice_from_str(&s1[1..2]);
_ = black_box(&*s1);
_ = black_box(&*s2);
}
#[test]
fn panic() {
let s1 = Mutex::new(ArcStr::from("Test".to_owned()));
let _: Box<_> = std::panic::catch_unwind(|| {
s1.lock().expect("Poisoned").with_mut(|s| {
s.push_str(&"A".repeat(100));
panic!();
});
})
.expect_err("Did not panic");
s1.clear_poison();
let s = s1.lock().expect("Poisoned");
_ = black_box(&**s);
}
}