Is there a trivial way to split a string keeping the separators? Instead of this:
let texte = \"Ten. Million. Questions. Let\'s celebrate all we\'ve done tog
I was not able to find anything in the standard library, so I wrote my own:
This version uses the unstable pattern API as it's more flexible, but the link above has a fallback that I've hardcoded for my specific stable usecase.
#![feature(pattern)]
use std::str::pattern::{Pattern, Searcher};
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum SplitType<'a> {
Match(&'a str),
Delimiter(&'a str),
}
pub struct SplitKeepingDelimiter<'p, P>
where
P: Pattern<'p>,
{
searcher: P::Searcher,
start: usize,
saved: Option,
}
impl<'p, P> Iterator for SplitKeepingDelimiter<'p, P>
where
P: Pattern<'p>,
{
type Item = SplitType<'p>;
fn next(&mut self) -> Option {
if self.start == self.searcher.haystack().len() {
return None;
}
if let Some(end_of_match) = self.saved.take() {
let s = &self.searcher.haystack()[self.start..end_of_match];
self.start = end_of_match;
return Some(SplitType::Delimiter(s));
}
match self.searcher.next_match() {
Some((start, end)) => {
if self.start == start {
let s = &self.searcher.haystack()[start..end];
self.start = end;
Some(SplitType::Delimiter(s))
} else {
let s = &self.searcher.haystack()[self.start..start];
self.start = start;
self.saved = Some(end);
Some(SplitType::Match(s))
}
}
None => {
let s = &self.searcher.haystack()[self.start..];
self.start = self.searcher.haystack().len();
Some(SplitType::Match(s))
}
}
}
}
pub trait SplitKeepingDelimiterExt: ::std::ops::Index<::std::ops::RangeFull, Output = str> {
fn split_keeping_delimiter(&self, pattern: P) -> SplitKeepingDelimiter
where
P: for<'a> Pattern<'a>,
{
SplitKeepingDelimiter {
searcher: pattern.into_searcher(&self[..]),
start: 0,
saved: None,
}
}
}
impl SplitKeepingDelimiterExt for str {}
#[cfg(test)]
mod test {
use super::SplitKeepingDelimiterExt;
#[test]
fn split_with_delimiter() {
use super::SplitType::*;
let delims = &[',', ';'][..];
let items: Vec<_> = "alpha,beta;gamma".split_keeping_delimiter(delims).collect();
assert_eq!(
&items,
&[
Match("alpha"),
Delimiter(","),
Match("beta"),
Delimiter(";"),
Match("gamma")
]
);
}
#[test]
fn split_with_delimiter_allows_consecutive_delimiters() {
use super::SplitType::*;
let delims = &[',', ';'][..];
let items: Vec<_> = ",;".split_keeping_delimiter(delims).collect();
assert_eq!(&items, &[Delimiter(","), Delimiter(";")]);
}
}
You'll note that I needed to track if something was one of the delimiters or not, but that should be easy to adapt if you don't need it.