Make all Utf8CStrWrite Utf8CStrBuf

This commit is contained in:
topjohnwu 2025-02-15 10:56:59 -08:00 committed by John Wu
parent 3c7c46307a
commit 584f3820fe
4 changed files with 94 additions and 89 deletions

View File

@ -18,23 +18,22 @@ use crate::slice_from_ptr_mut;
// Several Utf8CStr types:
//
// Utf8CStr: can only exist as reference, similar to &str
// Utf8CString: dynamically sized buffer allocated on the heap
// Utf8CString: dynamically sized buffer allocated on the heap, similar to String
// Utf8CStrBufRef: reference to a fixed sized buffer
// Utf8CStrBufArr<N>: fixed sized buffer allocated on the stack
//
// In most cases, these are the types being used
//
// &Utf8CStr: whenever a printable null terminated string is needed
// &mut dyn Utf8CStrWrite: whenever we need a buffer that only needs to support appending
// strings to the end, and has to be null terminated
// &mut dyn Utf8CStrBuf: whenever we need a buffer that needs to support appending
// strings to the end, and has to be null terminated
// &mut dyn Utf8CStrBuf: whenever we need a pre-allocated buffer that is large enough to fit
// in the result, and has to be null terminated
//
// All types dereferences to &Utf8CStr.
// Utf8CString, Utf8CStrBufRef, and Utf8CStrBufArr<N> implements Utf8CStrWrite.
// Utf8CStrBufRef and Utf8CStrBufArr<N> implements Utf8CStrBuf.
// Utf8CString, Utf8CStrBufRef, and Utf8CStrBufArr<N> implements Utf8CStrBuf.
fn utf8_cstr_buf_append(buf: &mut dyn Utf8CStrBuf, s: &[u8]) -> usize {
fn utf8_cstr_buf_append(buf: &mut dyn Utf8CStrBufWithSlice, s: &[u8]) -> usize {
let mut used = buf.len();
if used >= buf.capacity() - 1 {
// Truncate
@ -51,7 +50,7 @@ fn utf8_cstr_buf_append(buf: &mut dyn Utf8CStrBuf, s: &[u8]) -> usize {
len
}
fn utf8_cstr_append_lossy(buf: &mut dyn Utf8CStrWrite, s: &[u8]) -> usize {
fn utf8_cstr_append_lossy(buf: &mut dyn Utf8CStrBuf, s: &[u8]) -> usize {
let mut len = 0_usize;
for chunk in s.utf8_chunks() {
len += buf.push_str(chunk.valid());
@ -64,40 +63,55 @@ fn utf8_cstr_append_lossy(buf: &mut dyn Utf8CStrWrite, s: &[u8]) -> usize {
// Trait definitions
pub trait Utf8CStrWrite:
pub trait Utf8CStrBuf:
Write + AsRef<Utf8CStr> + AsMut<Utf8CStr> + Deref<Target = Utf8CStr> + DerefMut
{
fn len(&self) -> usize;
#[inline(always)]
fn is_empty(&self) -> bool {
self.len() == 0
}
fn push_str(&mut self, s: &str) -> usize;
fn push_lossy(&mut self, s: &[u8]) -> usize;
fn clear(&mut self);
}
pub trait Utf8CStrBuf: Utf8CStrWrite {
fn buf(&self) -> &[u8];
// Modifying the underlying buffer or length is unsafe because it can either:
// 1. Break null termination
// 2. Break UTF-8 validation
// 3. Introduce inner null byte in the string
unsafe fn mut_buf(&mut self) -> &mut [u8];
unsafe fn set_len(&mut self, len: usize);
fn push_str(&mut self, s: &str) -> usize;
fn push_lossy(&mut self, s: &[u8]) -> usize;
fn capacity(&self) -> usize;
fn clear(&mut self);
#[inline(always)]
fn capacity(&self) -> usize {
self.buf().len()
fn is_empty(&self) -> bool {
self.len() == 0
}
}
trait Utf8CStrBufWithSlice: Utf8CStrBuf {
fn buf(&self) -> &[u8];
unsafe fn mut_buf(&mut self) -> &mut [u8];
}
trait AsUtf8CStr {
fn as_utf8_cstr(&self) -> &Utf8CStr;
fn as_utf8_cstr_mut(&mut self) -> &mut Utf8CStr;
}
impl<T: Utf8CStrBufWithSlice> AsUtf8CStr for T {
#[inline(always)]
fn as_utf8_cstr(&self) -> &Utf8CStr {
// SAFETY: the internal buffer is always UTF-8 checked
// SAFETY: self.used is guaranteed to always <= SIZE - 1
unsafe { Utf8CStr::from_bytes_unchecked(self.buf().get_unchecked(..(self.len() + 1))) }
}
#[inline(always)]
fn as_utf8_cstr_mut(&mut self) -> &mut Utf8CStr {
// SAFETY: the internal buffer is always UTF-8 checked
// SAFETY: self.used is guaranteed to always <= SIZE - 1
unsafe {
let len = self.len() + 1;
Utf8CStr::from_bytes_unchecked_mut(self.mut_buf().get_unchecked_mut(..len))
}
}
}
// Implementation for Utf8CString
pub trait StringExt {
@ -132,13 +146,24 @@ impl StringExt for PathBuf {
}
}
#[derive(Default)]
pub struct Utf8CString(String);
impl Default for Utf8CString {
fn default() -> Self {
Utf8CString(String::with_capacity(256))
}
}
impl Utf8CString {
#[inline(always)]
pub fn new() -> Self {
Self::default()
pub fn with_capacity(capacity: usize) -> Utf8CString {
Utf8CString(String::with_capacity(capacity))
}
pub fn ensure_capacity(&mut self, capacity: usize) {
if self.capacity() >= capacity {
return;
}
self.0.reserve(capacity - self.0.len())
}
}
@ -151,16 +176,26 @@ impl AsUtf8CStr for Utf8CString {
#[inline(always)]
fn as_utf8_cstr_mut(&mut self) -> &mut Utf8CStr {
Utf8CStr::from_string(&mut self.0)
// SAFETY: the internal string is always null terminated
unsafe {
mem::transmute(slice::from_raw_parts_mut(
self.0.as_mut_ptr(),
self.0.len() + 1,
))
}
}
}
impl Utf8CStrWrite for Utf8CString {
impl Utf8CStrBuf for Utf8CString {
#[inline(always)]
fn len(&self) -> usize {
self.0.len()
}
unsafe fn set_len(&mut self, len: usize) {
self.0.as_mut_vec().set_len(len);
}
fn push_str(&mut self, s: &str) -> usize {
self.0.push_str(s);
self.0.nul_terminate();
@ -172,6 +207,10 @@ impl Utf8CStrWrite for Utf8CString {
utf8_cstr_append_lossy(self, s)
}
fn capacity(&self) -> usize {
self.0.capacity()
}
fn clear(&mut self) {
self.0.clear();
self.0.nul_terminate();
@ -185,27 +224,6 @@ impl From<String> for Utf8CString {
}
}
// Implementations for Utf8CStrBuf
impl<T: Utf8CStrBuf> AsUtf8CStr for T {
#[inline(always)]
fn as_utf8_cstr(&self) -> &Utf8CStr {
// SAFETY: the internal buffer is always UTF-8 checked
// SAFETY: self.used is guaranteed to always <= SIZE - 1
unsafe { Utf8CStr::from_bytes_unchecked(self.buf().get_unchecked(..(self.len() + 1))) }
}
#[inline(always)]
fn as_utf8_cstr_mut(&mut self) -> &mut Utf8CStr {
// SAFETY: the internal buffer is always UTF-8 checked
// SAFETY: self.used is guaranteed to always <= SIZE - 1
unsafe {
let len = self.len() + 1;
Utf8CStr::from_bytes_unchecked_mut(self.mut_buf().get_unchecked_mut(..len))
}
}
}
// UTF-8 validated + null terminated reference to buffer
pub struct Utf8CStrBufRef<'a> {
used: usize,
@ -225,7 +243,7 @@ impl<'a> From<&'a mut [u8]> for Utf8CStrBufRef<'a> {
}
}
impl Utf8CStrBuf for Utf8CStrBufRef<'_> {
impl Utf8CStrBufWithSlice for Utf8CStrBufRef<'_> {
#[inline(always)]
fn buf(&self) -> &[u8] {
self.buf
@ -235,11 +253,6 @@ impl Utf8CStrBuf for Utf8CStrBufRef<'_> {
unsafe fn mut_buf(&mut self) -> &mut [u8] {
self.buf
}
#[inline(always)]
unsafe fn set_len(&mut self, len: usize) {
self.used = len;
}
}
// UTF-8 validated + null terminated buffer on the stack
@ -257,7 +270,7 @@ impl<const N: usize> Utf8CStrBufArr<N> {
}
}
impl<const N: usize> Utf8CStrBuf for Utf8CStrBufArr<N> {
impl<const N: usize> Utf8CStrBufWithSlice for Utf8CStrBufArr<N> {
#[inline(always)]
fn buf(&self) -> &[u8] {
&self.buf
@ -267,16 +280,6 @@ impl<const N: usize> Utf8CStrBuf for Utf8CStrBufArr<N> {
unsafe fn mut_buf(&mut self) -> &mut [u8] {
&mut self.buf
}
#[inline(always)]
unsafe fn set_len(&mut self, len: usize) {
self.used = len;
}
#[inline(always)]
fn capacity(&self) -> usize {
N
}
}
impl Default for Utf8CStrBufArr<4096> {
@ -446,16 +449,16 @@ impl DerefMut for FsPath {
}
}
pub struct FsPathBuf<'a>(&'a mut dyn Utf8CStrWrite);
pub struct FsPathBuf<'a>(&'a mut dyn Utf8CStrBuf);
impl<'a> FsPathBuf<'a> {
pub fn new(value: &'a mut dyn Utf8CStrWrite) -> Self {
pub fn new(value: &'a mut dyn Utf8CStrBuf) -> Self {
value.clear();
FsPathBuf(value)
}
pub fn join<T: AsRef<str>>(self, path: T) -> Self {
fn inner(buf: &mut dyn Utf8CStrWrite, path: &str) {
fn inner(buf: &mut dyn Utf8CStrBuf, path: &str) {
if path.starts_with('/') {
buf.clear();
} else {
@ -468,7 +471,7 @@ impl<'a> FsPathBuf<'a> {
}
pub fn join_fmt<T: Display>(self, name: T) -> Self {
fn inner(buf: &mut dyn Utf8CStrWrite, path: Arguments) {
fn inner(buf: &mut dyn Utf8CStrBuf, path: Arguments) {
buf.write_fmt(path).ok();
}
inner(self.0, format_args!("/{}", name));
@ -578,7 +581,7 @@ impl_str!(
(Utf8CString,)
);
macro_rules! impl_str_write {
macro_rules! impl_str_buf {
($( ($t:ty, $($g:tt)*) )*) => {$(
impl<$($g)*> Write for $t {
#[inline(always)]
@ -610,20 +613,24 @@ macro_rules! impl_str_write {
)*}
}
impl_str_write!(
impl_str_buf!(
(Utf8CStrBufRef<'_>,)
(Utf8CStrBufArr<N>, const N: usize)
(Utf8CString,)
);
macro_rules! impl_str_buf {
macro_rules! impl_str_buf_with_slice {
($( ($t:ty, $($g:tt)*) )*) => {$(
impl<$($g)*> Utf8CStrWrite for $t {
impl<$($g)*> Utf8CStrBuf for $t {
#[inline(always)]
fn len(&self) -> usize {
self.used
}
#[inline(always)]
unsafe fn set_len(&mut self, len: usize) {
self.used = len;
}
#[inline(always)]
fn push_str(&mut self, s: &str) -> usize {
utf8_cstr_buf_append(self, s.as_bytes())
}
@ -632,6 +639,10 @@ macro_rules! impl_str_buf {
utf8_cstr_append_lossy(self, s)
}
#[inline(always)]
fn capacity(&self) -> usize {
self.buf.len()
}
#[inline(always)]
fn clear(&mut self) {
self.buf[0] = b'\0';
self.used = 0;
@ -640,7 +651,7 @@ macro_rules! impl_str_buf {
)*}
}
impl_str_buf!(
impl_str_buf_with_slice!(
(Utf8CStrBufRef<'_>,)
(Utf8CStrBufArr<N>, const N: usize)
);

View File

@ -1,7 +1,6 @@
use crate::cxx_extern::readlinkat_for_cxx;
use crate::{
cstr, errno, error, FsPath, FsPathBuf, LibcReturn, Utf8CStr, Utf8CStrBuf, Utf8CStrBufArr,
Utf8CStrWrite,
};
use bytemuck::{bytes_of, bytes_of_mut, Pod};
use libc::{
@ -622,9 +621,9 @@ impl FsPath {
buf.clear();
unsafe {
let r = libc::readlink(self.as_ptr(), buf.as_mut_ptr().cast(), buf.capacity() - 1)
.check_os_err()? as usize;
*buf.mut_buf().get_unchecked_mut(r) = b'\0';
buf.set_len(r);
.check_os_err()? as isize;
*(buf.as_mut_ptr().offset(r) as *mut u8) = b'\0';
buf.set_len(r as usize);
}
Ok(())
}
@ -791,7 +790,7 @@ impl FsPath {
unsafe { libc::symlink(self.as_ptr(), path.as_ptr()).as_os_err() }
}
pub fn parent(&self, buf: &mut dyn Utf8CStrWrite) -> bool {
pub fn parent(&self, buf: &mut dyn Utf8CStrBuf) -> bool {
buf.clear();
if let Some(parent) = Path::new(self.as_str()).parent() {
let bytes = parent.as_os_str().as_bytes();

View File

@ -21,7 +21,7 @@ use base::libc::{
};
use base::{
log_err, map_args, BytesExt, EarlyExitExt, FsPath, LoggedResult, MappedFile, ResultExt,
Utf8CStr, Utf8CStrBufArr, Utf8CStrWrite, WriteExt,
Utf8CStr, Utf8CStrBuf, Utf8CStrBufArr, WriteExt,
};
use crate::check_env;

View File

@ -7,7 +7,7 @@ use base::libc::{
};
use base::{
const_format::concatcp, libc, raw_cstr, FsPathBuf, LogLevel, Logger, ReadExt, Utf8CStr,
Utf8CStrBuf, Utf8CStrBufArr, Utf8CStrWrite, WriteExt, LOGGER,
Utf8CStrBuf, Utf8CStrBufArr, WriteExt, LOGGER,
};
use bytemuck::{bytes_of, write_zeroes, Pod, Zeroable};
use num_derive::{FromPrimitive, ToPrimitive};
@ -319,12 +319,7 @@ extern "C" fn logfile_writer(arg: *mut c_void) -> *mut c_void {
if localtime_r(&secs, &mut tm).is_null() {
continue;
}
let len = strftime(
aux.mut_buf().as_mut_ptr().cast(),
aux.capacity(),
raw_cstr!("%m-%d %T"),
&tm,
);
let len = strftime(aux.as_mut_ptr(), aux.capacity(), raw_cstr!("%m-%d %T"), &tm);
aux.set_len(len);
aux.write_fmt(format_args!(
".{:03} {:5} {:5} {} : ",