I would like to parse a text file with content that looks something like below:
START-OF-DATA
#100846105
START SECURITY|US912810DZ85|CBBT|
## in: 20150430_14
You can read the file line by line, and filter out the lines you do not want. Also, the tick values and ids can be collected into a list of strings.
Sample code:
var res = string.Empty;
var ids = new List<string>();
using (var sr = new StreamReader(filepath, true))
{
var s = "";
while ((s = sr.ReadLine()) != null)
{
if (s.StartsWith("START-OF-DATA"))
{
while (!s.StartsWith("END-OF-DATA"))
{
if ( !s.StartsWith("START SECURITY") &&
!s.StartsWith("##") &&
!s.StartsWith("END SECURITY"))
{
res += s + System.Environment.NewLine;
}
if (s.StartsWith("#") && !s.StartsWith("##"))
ids.Add(s);
s = sr.ReadLine();
}
res += s;
}
}
}
Output:
START-OF-DATA
#100846105
04/30|15:00:00|B|118.640625||| |A|118.703125||| ||
04/30|14:59:54|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:52|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:23|B|118.6328125||| |A|118.6953125||| ||
04/30|14:59:20|B|118.6328125||| |A|118.6953125||| ||
#100846111
04/30|15:00:00|B|124.75||| |A|124.828125||| ||
04/30|14:59:55|B|124.75||| |A|124.8203125||| ||
04/30|14:59:53|B|124.7421875||| |A|124.8203125||| ||
04/30|14:59:45|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:43|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:27|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:24|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:22|B|124.7421875||| |A|124.8125||| ||
04/30|14:59:20|B|124.7421875||| |A|124.828125||| ||
04/30|14:59:13|B|124.7421875||| |A|124.8125||| ||
END-OF-DATA
Then, if you have multiple blocks to read, just create a list of strings to store the res
s in, add right after res += s;
.
Here is a simple parser
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
List<Section> sections = new List<Section>();
string input =
"START-OF-DATA\n" +
"#100846105\n" +
"START SECURITY|US912810DZ85|CBBT|\n" +
"## in: 20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"## out:20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"04/30|15:00:00|B|118.640625||| |A|118.703125||| ||\n" +
"04/30|14:59:54|B|118.6328125||| |A|118.6953125||| ||\n" +
"04/30|14:59:52|B|118.6328125||| |A|118.6953125||| ||\n" +
"04/30|14:59:23|B|118.6328125||| |A|118.6953125||| ||\n" +
"04/30|14:59:20|B|118.6328125||| |A|118.6953125||| ||\n" +
"END SECURITY|US912810DZ85|0|\n" +
"#100846111\n" +
"START SECURITY|US912810EA26|CBBT|\n" +
"## in: 20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"## out:20150430_14:59:00 to 20150430_15:00:00 [13 (New York-DST)]\n" +
"04/30|15:00:00|B|124.75||| |A|124.828125||| ||\n" +
"04/30|14:59:55|B|124.75||| |A|124.8203125||| ||\n" +
"04/30|14:59:53|B|124.7421875||| |A|124.8203125||| ||\n" +
"04/30|14:59:45|B|124.7421875||| |A|124.8125||| ||\n" +
"04/30|14:59:43|B|124.7421875||| |A|124.828125||| ||\n" +
"04/30|14:59:27|B|124.7421875||| |A|124.8125||| ||\n" +
"04/30|14:59:24|B|124.7421875||| |A|124.828125||| ||\n" +
"04/30|14:59:22|B|124.7421875||| |A|124.8125||| ||\n" +
"04/30|14:59:20|B|124.7421875||| |A|124.828125||| ||\n" +
"04/30|14:59:13|B|124.7421875||| |A|124.8125||| ||\n" +
"END SECURITY|US912810EA26|0|\n" +
"END-OF-DATA\n";
StringReader reader = new StringReader(input);
string inputLine = "";
Section newSection = null;
while ((inputLine = reader.ReadLine()) != null)
{
inputLine = inputLine.Trim();
if (inputLine.StartsWith("#"))
{
if (inputLine.Contains("in:")) continue;
if (inputLine.Contains("out:")) continue;
newSection = new Section();
sections.Add(newSection);
newSection.iD = inputLine.Substring(1);
newSection.data = new List<string>();
}
else
{
if (inputLine.Substring(0, 3) == "END") continue;
if (inputLine.Substring(0, 5) == "START") continue;
newSection.data.Add(inputLine);
}
}
}
public class Section
{
public string iD { get; set; }
public List<string> data { get; set; }
}
}
}