How to read text file by particular line separator character?

假装没事ソ 提交于 2019-11-26 16:05:47

问题


Reading a text file using streamreader.

using (StreamReader sr = new StreamReader(FileName, Encoding.Default))
{
     string line = sr.ReadLine();
}

I want to force that line delimiter should be \n not \r. So how can i do that?


回答1:


string text = sr.ReadToEnd();
string[] lines = text.Split('\r');
foreach(string s in lines)
{
   // Consume
}



回答2:


I would implement something like George's answer, but as an extension method that avoids loading the whole file at once (not tested, but something like this):

static class ExtensionsForTextReader
{
     public static IEnumerable<string> ReadLines (this TextReader reader, char delimiter)
     {
            List<char> chars = new List<char> ();
            while (reader.Peek() >= 0)
            {
                char c = (char)reader.Read ();

                if (c == delimiter) {
                    yield return new String(chars.ToArray());
                    chars.Clear ();
                    continue;
                }

                chars.Add(c);
            }
     }
}

Which could then be used like:

using (StreamReader sr = new StreamReader(FileName, Encoding.Default))
{
     foreach (var line in sr.ReadLines ('\n'))
           Console.WriteLine (line);
}



回答3:


I loved the answer @Pete gave. I would just like to submit a slight modification. This will allow you to pass a string delimiter instead of just a single character:

using System;
using System.IO;
using System.Collections.Generic;
internal static class StreamReaderExtensions
{
    public static IEnumerable<string> ReadUntil(this StreamReader reader, string delimiter)
    {
        List<char> buffer = new List<char>();
        CircularBuffer<char> delim_buffer = new CircularBuffer<char>(delimiter.Length);
        while (reader.Peek() >= 0)
        {
            char c = (char)reader.Read();
            delim_buffer.Enqueue(c);
            if (delim_buffer.ToString() == delimiter || reader.EndOfStream)
            {
                if (buffer.Count > 0)
                {
                    if (!reader.EndOfStream)
                    {
                        yield return new String(buffer.ToArray()).Replace(delimiter.Substring(0, delimiter.Length - 1), string.Empty);
                    }
                    else
                    {
                        buffer.Add(c);
                        yield return new String(buffer.ToArray());
                    }
                    buffer.Clear();
                }
                continue;
            }
            buffer.Add(c);
        }
    }

    private class CircularBuffer<T> : Queue<T>
    {
        private int _capacity;

        public CircularBuffer(int capacity)
            : base(capacity)
        {
            _capacity = capacity;
        }

        new public void Enqueue(T item)
        {
            if (base.Count == _capacity)
            {
                base.Dequeue();
            }
            base.Enqueue(item);
        }

        public override string ToString()
        {
            List<String> items = new List<string>();
            foreach (var x in this)
            {
                items.Add(x.ToString());
            };
            return String.Join("", items);
        }
    }
}



回答4:


According to the documentation:

http://msdn.microsoft.com/en-us/library/system.io.streamreader.readline.aspx

A line is defined as a sequence of characters followed by a line feed ("\n"), a carriage return ("\r"), or a carriage return immediately followed by a line feed ("\r\n").

By default the StreamReader ReadLine method will recognise a line by both/either \n or \r




回答5:


This is an improvement of sovemp answer. Sorry I would have liked to comment, although my reputation doesn't allow me to do so. This improvement addresses 2 issues:

  1. example sequence "text\rtest\r\n" with delimiter "\r\n" would also delete the first "\r" which is not intended.
  2. when last characters in stream equals delimiter, function would wrongly return string including delimiters.

    using System;
    using System.IO;
    using System.Collections.Generic;
    internal static class StreamReaderExtensions
    {
        public static IEnumerable<string> ReadUntil(this StreamReader reader, string delimiter)
        {
            List<char> buffer = new List<char>();
            CircularBuffer<char> delim_buffer = new CircularBuffer<char>(delimiter.Length);
            while (reader.Peek() >= 0)
            {
                char c = (char)reader.Read();
                delim_buffer.Enqueue(c);
                if (delim_buffer.ToString() == delimiter || reader.EndOfStream)
                {
                    if (buffer.Count > 0)
                    {
                        if (!reader.EndOfStream)
                        {
                            buffer.Add(c);
                            yield return new String(buffer.ToArray()).Substring(0, buffer.Count - delimeter.Length);
                        }
                        else
                        {
                            buffer.Add(c);
                            if (delim_buffer.ToString() != delimiter)
                                yield return new String(buffer.ToArray());
                            else
                                yield return new String(buffer.ToArray()).Substring(0, buffer.Count - delimeter.Length);
                        }
                        buffer.Clear();
                    }
                    continue;
                }
                buffer.Add(c);
            }
        }
    
        private class CircularBuffer<T> : Queue<T>
        {
            private int _capacity;
    
            public CircularBuffer(int capacity)
                : base(capacity)
            {
                _capacity = capacity;
            }
    
            new public void Enqueue(T item)
            {
                if (base.Count == _capacity)
                {
                    base.Dequeue();
                }
                base.Enqueue(item);
            }
    
            public override string ToString()
            {
                List<String> items = new List<string>();
                foreach (var x in this)
                {
                    items.Add(x.ToString());
                };
                return String.Join("", items);
            }
        }
    }
    



回答6:


You either have to parse the stream byte-by-byte yourself and handle the split, or you need to use the default ReadLine behavior which splits on /r, /n, or /r/n.

If you want to parse the stream byte-by-byte, I'd use something like the following extension method:

 public static string ReadToChar(this StreamReader sr, char splitCharacter)
    {        
        char nextChar;
        StringBuilder line = new StringBuilder();
        while (sr.Peek() > 0)
        {               
            nextChar = (char)sr.Read();
            if (nextChar == splitCharacter) return line.ToString();
            line.Append(nextChar);
        }

        return line.Length == 0 ? null : line.ToString();
    }



回答7:


I needed a solution that reads until "\r\n", and does not stop at "\n". jp1980's solution worked, but was extremely slow on a large file. So, I converted Mike Sackton's solution to read until a specified string is found.

public static string ReadToString(StreamReader sr, string splitString)
{        
    char nextChar;
    StringBuilder line = new StringBuilder();
    int matchIndex = 0;

    while (sr.Peek() > 0)
    {               
        nextChar = (char)sr.Read();
        line.Append(nextChar);
        if (nextChar == splitString[matchIndex])
        {
            if(matchIndex == splitString.Length - 1)
            {
                return line.ToString().Substring(0, line.Length - splitString.Length);
            }
            matchIndex++;
        }
        else
        {
            matchIndex = 0;
        }
    }

    return line.Length == 0 ? null : line.ToString();
}

And it is called like this...

using (StreamReader reader = new StreamReader(file))
{
    string line;
    while((line = ReadToString(reader, "\r\n")) != null)
    {
        Console.WriteLine(line);
    }
}



回答8:


Even though you said "Using StreamReader", since you also said "I my case, file can have tons of records...", I would recommend trying SSIS. It's perfect for what you're trying to do. You can process very large file and specify the line/column delimiters easily.




回答9:


This code snippet will read a line from a file until it encounters "\n".

using (StreamReader sr = new StreamReader(path)) 
{
     string line = string.Empty;
     while (sr.Peek() >= 0) 
     {
          char c = (char)sr.Read();
          if (c == '\n')
          {
              //end of line encountered
              Console.WriteLine(line);
              //create new line
              line = string.Empty;
          }
          else
          {
               line += (char)sr.Read();
          }
     }
}

Because this code reads character by character it will work with a file of any length without being constrained by available memory.



来源:https://stackoverflow.com/questions/6655246/how-to-read-text-file-by-particular-line-separator-character

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!