Split a String without removing the delimiter in Swift

后端 未结 3 1769
南方客
南方客 2020-12-19 23:59

This might be a duplicate. I couldn\'t find the answer in Swift, so I am not sure.

componentsSeparatedByCharactersInSet removes the delimiter. If you se

相关标签:
3条回答
  • 2020-12-20 00:40

    This method works on CollectionTypes, rather than Strings, but it should be easy enough to adapt:

    extension CollectionType {
      func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows ->  [SubSequence] {
        var p = startIndex
        return try indices
          .filter { i in try isSplit(self[i]) }
          .map { i in
            defer { p = i }
            return self[p..<i]
          } + [suffixFrom(p)]
      }
    }
    
    extension CollectionType where Generator.Element : Equatable {
      func splitAt(splitter: Generator.Element) -> [SubSequence] {
        return splitAt { el in el == splitter }
      }
    }
    

    You could use it like this:

    let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
    
    let puncSet = Set("!.,:".characters)
    
    sentence
      .characters
      .splitAt(puncSet.contains)
      .map(String.init)
    
    // ["Hello", ", my name is oisdk", ". This should split", ": but only at punctuation", "!"]
    

    Or, this version, which uses a for-loop, and splits after the delimiter:

    extension CollectionType {
      func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows ->  [SubSequence] {
        var p = startIndex
        var result: [SubSequence] = []
        for i in indices where try isSplit(self[i]) {
          result.append(self[p...i])
          p = i.successor()
        }
        if p != endIndex { result.append(suffixFrom(p)) }
        return result
      }
    }
    
    
    extension CollectionType where Generator.Element : Equatable {
      func splitAt(splitter: Generator.Element) -> [SubSequence] {
        return splitAt { el in el == splitter }
      }
    }
    
    let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
    
    let puncSet = Set("!.,:".characters)
    
    sentence
      .characters
      .splitAt(puncSet.contains)
      .map(String.init)
    
    // ["Hello,", " my name is oisdk.", " This should split:", " but only at punctuation!"]
    

    Or, if you wanted to get the most Swift features into one function (defer, throws, a Protocol extension, an evil flatMap, guard, and Optionals):

    extension CollectionType {
      func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
        var p = startIndex
        var result: [SubSequence] = try indices.flatMap { i in
          guard try isSplit(self[i]) else { return nil }
          defer { p = i.successor() }
          return self[p...i]
        }
        if p != endIndex { result.append(suffixFrom(p)) }
        return result
      }
    }
    
    0 讨论(0)
  • 2020-12-20 00:41

    Swift 3 and 4 Versions

    extension Collection {
        func splitAt(isSplit: (Iterator.Element) throws -> Bool) rethrows -> [SubSequence] {
            var p = self.startIndex
            var result:[SubSequence] = try self.indices.flatMap {
                i in
                guard try isSplit(self[i]) else {
                    return nil
                }
                defer {
                    p = self.index(after: i)
                }
                return self[p...i]
            }
            if p != self.endIndex {
                result.append(suffix(from: p))
            }
            return result
        }
    }
    

    Thanks to Oisdk for getting me thinking.

    0 讨论(0)
  • 2020-12-20 00:43

    I came here looking for an answer to this question. Didn't find what I was looking for and ended up building this by repeated calls to .split(...) It isn't elegant but you can choose which delimiters are preserved and which aren't. There's probably a way to avoid the String <--> Substring conversions, anyone know?

    var input = """
        {All those moments will be (lost in time)},
        like tears [in rain](. ([(Time to)] die))
    """
    var separator: Character = "!"
    var output: [String] = []
    repeat {
        let tokens = input.split(
            maxSplits: 1,
            omittingEmptySubsequences: false,
            whereSeparator: {
                switch $0 {
                    case "{", "}", "(", ")", "[", "]": // preserve
                        separator = $0; return true
                    case " ", "\n", ",", ".":          // omit
                        separator = " "; return true
                    default:
                        return false
                }
            }
        )
        if tokens[0] != ""  { 
            output.append(String(tokens[0])) 
        }
        guard tokens.count == 2 else { break }
        if separator != " " { 
            output.append(String(separator)) 
        }
        input = String(tokens[1])
    } while true
    
    for token in output { print("\(token)") }
    

    In the case above, the selectors are not in actual sets. I didn't need that, but if you do, simply make these declarations,

    let preservedDelimiters: Set<Character> = [ "{", "}", "(", ")", "[", "]" ]
    let omittedDelimiters: Set<Character> = [ " ", "\n", ",", "." ]
    

    and replace the whereSeparator function with:

    whereSeparator: {
        if preservedDelimiters.contains($0) {
            separator = $0
            return true
        } else if omittedDelimiters.contains($0) {
            separator = " "
            return true
        } else {
            return false
        }
    }
    
    0 讨论(0)
提交回复
热议问题