I\'m trying to write a CSV parser using Scala parser combinators. The grammar is based on RFC4180. I came up with the following code. It almost works, but I cannot get it to
With Scala Parser Combinators library out of the Scala standard library starting from 2.11 there is no good reason not to use the much more performant Parboiled2 library. Here is a version of the CSV parser in Parboiled2's DSL:
/* based on comments in https://github.com/sirthias/parboiled2/issues/61 */
import org.parboiled2._
case class Parboiled2CsvParser(input: ParserInput, delimeter: String) extends Parser {
def DQUOTE = '"'
def DELIMITER_TOKEN = rule(capture(delimeter))
def DQUOTE2 = rule("\"\"" ~ push("\""))
def CRLF = rule(capture("\r\n" | "\n"))
def NON_CAPTURING_CRLF = rule("\r\n" | "\n")
val delims = s"$delimeter\r\n" + DQUOTE
def TXT = rule(capture(!anyOf(delims) ~ ANY))
val WHITESPACE = CharPredicate(" \t")
def SPACES: Rule0 = rule(oneOrMore(WHITESPACE))
def escaped = rule(optional(SPACES) ~
DQUOTE ~ (zeroOrMore(DELIMITER_TOKEN | TXT | CRLF | DQUOTE2) ~ DQUOTE ~
optional(SPACES)) ~> (_.mkString("")))
def nonEscaped = rule(zeroOrMore(TXT | capture(DQUOTE)) ~> (_.mkString("")))
def field = rule(escaped | nonEscaped)
def row: Rule1[Seq[String]] = rule(oneOrMore(field).separatedBy(delimeter))
def file = rule(zeroOrMore(row).separatedBy(NON_CAPTURING_CRLF))
def parsed() : Try[Seq[Seq[String]]] = file.run()
}