How to read a text file reversely with iterator in C#

后端 未结 11 2040
甜味超标
甜味超标 2020-11-22 04:05

I need to process a large file, around 400K lines and 200 M. But sometimes I have to process from bottom up. How can I use iterator (yield return) here? Basically I don\'t l

11条回答
  •  北荒
    北荒 (楼主)
    2020-11-22 04:35

    I wanted to do the similar thing. Here is my code. This class will create temporary files containing chunks of the big file. This will avoid memory bloating. User can specify whether s/he wants the file reversed. Accordingly it will return the content in reverse manner.

    This class can also be used to write big data in a single file without bloating memory.

    Please provide feedback.

            using System;
            using System.Collections.Generic;
            using System.Diagnostics;
            using System.IO;
            using System.Linq;
            using System.Text;
            using System.Threading.Tasks;
    
            namespace BigFileService
            {    
                public class BigFileDumper
                {
                    /// 
                    /// Buffer that will store the lines until it is full.
                    /// Then it will dump it to temp files.
                    /// 
                    public int CHUNK_SIZE = 1000;
                    public bool ReverseIt { get; set; }
                    public long TotalLineCount { get { return totalLineCount; } }
                    private long totalLineCount;
                    private int BufferCount = 0;
                    private StreamWriter Writer;
                    /// 
                    /// List of files that would store the chunks.
                    /// 
                    private List LstTempFiles;
                    private string ParentDirectory;
                    private char[] trimchars = { '/', '\\'};
    
    
                    public BigFileDumper(string FolderPathToWrite)
                    {
                        this.LstTempFiles = new List();
                        this.ParentDirectory = FolderPathToWrite.TrimEnd(trimchars) + "\\" + "BIG_FILE_DUMP";
                        this.totalLineCount = 0;
                        this.BufferCount = 0;
                        this.Initialize();
                    }
    
                    private void Initialize()
                    {
                        // Delete existing directory.
                        if (Directory.Exists(this.ParentDirectory))
                        {
                            Directory.Delete(this.ParentDirectory, true);
                        }
    
                        // Create a new directory.
                        Directory.CreateDirectory(this.ParentDirectory);
                    }
    
                    public void WriteLine(string line)
                    {
                        if (this.BufferCount == 0)
                        {
                            string newFile = "DumpFile_" + LstTempFiles.Count();
                            LstTempFiles.Add(newFile);
                            Writer = new StreamWriter(this.ParentDirectory + "\\" + newFile);
                        }
                        // Keep on adding in the buffer as long as size is okay.
                        if (this.BufferCount < this.CHUNK_SIZE)
                        {
                            this.totalLineCount++; // main count
                            this.BufferCount++; // Chunk count.
                            Writer.WriteLine(line);
                        }
                        else
                        {
                            // Buffer is full, time to create a new file.
                            // Close the existing file first.
                            Writer.Close();
                            // Make buffer count 0 again.
                            this.BufferCount = 0;
                            this.WriteLine(line);
                        }
                    }
    
                    public void Close()
                    {
                        if (Writer != null)
                            Writer.Close();
                    }
    
                    public string GetFullFile()
                    {
                        if (LstTempFiles.Count <= 0)
                        {
                            Debug.Assert(false, "There are no files created.");
                            return "";
                        }
                        string returnFilename = this.ParentDirectory + "\\" + "FullFile";
                        if (File.Exists(returnFilename) == false)
                        {
                            // Create a consolidated file from the existing small dump files.
                            // Now this is interesting. We will open the small dump files one by one.
                            // Depending on whether the user require inverted file, we will read them in descending order & reverted, 
                            // or ascending order in normal way.
    
                            if (this.ReverseIt)
                                this.LstTempFiles.Reverse();
    
                            foreach (var fileName in LstTempFiles)
                            {
                                string fullFileName = this.ParentDirectory + "\\" + fileName;
    // FileLines will use small memory depending on size of CHUNK. User has control.
                                var fileLines = File.ReadAllLines(fullFileName);
    
                                // Time to write in the writer.
                                if (this.ReverseIt)
                                    fileLines = fileLines.Reverse().ToArray();
    
                                // Write the lines 
                                File.AppendAllLines(returnFilename, fileLines);
                            }
                        }
    
                        return returnFilename;
                    }
                }
            }
    

    This service can be used as follows -

    void TestBigFileDump_File(string BIG_FILE, string FOLDER_PATH_FOR_CHUNK_FILES)
            {
                // Start processing the input Big file.
                StreamReader reader = new StreamReader(BIG_FILE);
                // Create a dump file class object to handle efficient memory management.
                var bigFileDumper = new BigFileDumper(FOLDER_PATH_FOR_CHUNK_FILES);
                // Set to reverse the output file.
                bigFileDumper.ReverseIt = true;
                bigFileDumper.CHUNK_SIZE = 100; // How much at a time to keep in RAM before dumping to local file.
    
                while (reader.EndOfStream == false)
                {
                    string line = reader.ReadLine();
                    bigFileDumper.WriteLine(line);
                }
                bigFileDumper.Close();
                reader.Close();
    
                // Get back full reversed file.
                var reversedFilename = bigFileDumper.GetFullFile();
                Console.WriteLine("Check output file - " + reversedFilename);
            }
    

提交回复
热议问题