How to read a text file reversely with iterator in C#

后端 未结 11 2018
甜味超标
甜味超标 2020-11-22 04:05

I need to process a large file, around 400K lines and 200 M. But sometimes I have to process from bottom up. How can I use iterator (yield return) here? Basically I don\'t l

11条回答
  •  一整个雨季
    2020-11-22 04:28

    There are good answers here already, and here's another LINQ-compatible class you can use which focuses on performance and support for large files. It assumes a "\r\n" line terminator.

    Usage:

    var reader = new ReverseTextReader(@"C:\Temp\ReverseTest.txt");
    while (!reader.EndOfStream)
        Console.WriteLine(reader.ReadLine());
    

    ReverseTextReader Class:

    /// 
    /// Reads a text file backwards, line-by-line.
    /// 
    /// This class uses file seeking to read a text file of any size in reverse order.  This
    /// is useful for needs such as reading a log file newest-entries first.
    public sealed class ReverseTextReader : IEnumerable
    {
        private const int BufferSize = 16384;   // The number of bytes read from the uderlying stream.
        private readonly Stream _stream;        // Stores the stream feeding data into this reader
        private readonly Encoding _encoding;    // Stores the encoding used to process the file
        private byte[] _leftoverBuffer;         // Stores the leftover partial line after processing a buffer
        private readonly Queue _lines;  // Stores the lines parsed from the buffer
    
        #region Constructors
    
        /// 
        /// Creates a reader for the specified file.
        /// 
        /// 
        public ReverseTextReader(string filePath)
            : this(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read), Encoding.Default)
        { }
    
        /// 
        /// Creates a reader using the specified stream.
        /// 
        /// 
        public ReverseTextReader(Stream stream)
            : this(stream, Encoding.Default)
        { }
    
        /// 
        /// Creates a reader using the specified path and encoding.
        /// 
        /// 
        /// 
        public ReverseTextReader(string filePath, Encoding encoding)
            : this(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read), encoding)
        { }
    
        /// 
        /// Creates a reader using the specified stream and encoding.
        /// 
        /// 
        /// 
        public ReverseTextReader(Stream stream, Encoding encoding)
        {          
            _stream = stream;
            _encoding = encoding;
            _lines = new Queue(128);            
            // The stream needs to support seeking for this to work
            if(!_stream.CanSeek)
                throw new InvalidOperationException("The specified stream needs to support seeking to be read backwards.");
            if (!_stream.CanRead)
                throw new InvalidOperationException("The specified stream needs to support reading to be read backwards.");
            // Set the current position to the end of the file
            _stream.Position = _stream.Length;
            _leftoverBuffer = new byte[0];
        }
    
        #endregion
    
        #region Overrides
    
        /// 
        /// Reads the next previous line from the underlying stream.
        /// 
        /// 
        public string ReadLine()
        {
            // Are there lines left to read? If so, return the next one
            if (_lines.Count != 0) return _lines.Dequeue();
            // Are we at the beginning of the stream? If so, we're done
            if (_stream.Position == 0) return null;
    
            #region Read and Process the Next Chunk
    
            // Remember the current position
            var currentPosition = _stream.Position;
            var newPosition = currentPosition - BufferSize;
            // Are we before the beginning of the stream?
            if (newPosition < 0) newPosition = 0;
            // Calculate the buffer size to read
            var count = (int)(currentPosition - newPosition);
            // Set the new position
            _stream.Position = newPosition;
            // Make a new buffer but append the previous leftovers
            var buffer = new byte[count + _leftoverBuffer.Length];
            // Read the next buffer
            _stream.Read(buffer, 0, count);
            // Move the position of the stream back
            _stream.Position = newPosition;
            // And copy in the leftovers from the last buffer
            if (_leftoverBuffer.Length != 0)
                Array.Copy(_leftoverBuffer, 0, buffer, count, _leftoverBuffer.Length);
            // Look for CrLf delimiters
            var end = buffer.Length - 1;
            var start = buffer.Length - 2;
            // Search backwards for a line feed
            while (start >= 0)
            {
                // Is it a line feed?
                if (buffer[start] == 10)
                {
                    // Yes.  Extract a line and queue it (but exclude the \r\n)
                    _lines.Enqueue(_encoding.GetString(buffer, start + 1, end - start - 2));
                    // And reset the end
                    end = start;
                }
                // Move to the previous character
                start--;
            }
            // What's left over is a portion of a line. Save it for later.
            _leftoverBuffer = new byte[end + 1];
            Array.Copy(buffer, 0, _leftoverBuffer, 0, end + 1);
            // Are we at the beginning of the stream?
            if (_stream.Position == 0)
                // Yes.  Add the last line.
                _lines.Enqueue(_encoding.GetString(_leftoverBuffer, 0, end - 1));
    
            #endregion
    
            // If we have something in the queue, return it
            return _lines.Count == 0 ? null : _lines.Dequeue();
        }
    
        #endregion
    
        #region IEnumerator Interface
    
        public IEnumerator GetEnumerator()
        {
            string line;
            // So long as the next line isn't null...
            while ((line = ReadLine()) != null)
                // Read and return it.
                yield return line;
        }
    
        IEnumerator IEnumerable.GetEnumerator()
        {
            throw new NotImplementedException();
        }
    
        #endregion
    }
    

提交回复
热议问题