How to convert PDF to text file in iTextSharp

后端 未结 3 938
时光说笑
时光说笑 2021-01-05 10:08

I have to retrieve text from PDF file. But using the following code I only get empty text file.

for (int i = 0; i < n; i++)
{
    pagenumber = i + 1;
            


        
3条回答
  •  一个人的身影
    2021-01-05 10:53

    using System;
    using System.IO;
    using System.Linq;
    using System.Text;
    using iTextSharp.text.pdf;
    using iTextSharp.text.pdf.parser;
    
    namespace Pdf2Text
    {
        class Program
        {
            static void Main(string[] args)
            {
                if (!args.Any()) return;
    
                var file = args[0];
                var output = Path.ChangeExtension(file, ".txt");
                if (!File.Exists(file)) return;
    
                var bytes = File.ReadAllBytes(file);
                File.WriteAllText(output, ConvertToText(bytes), Encoding.UTF8);
            }
    
            private static string ConvertToText(byte[] bytes)
            {
                var sb = new StringBuilder();
    
                try
                {
                    var reader = new PdfReader(bytes);
                    var numberOfPages = reader.NumberOfPages;
    
                    for (var currentPageIndex = 1; currentPageIndex <= numberOfPages; currentPageIndex++)
                    {
                        sb.Append(PdfTextExtractor.GetTextFromPage(reader, currentPageIndex));
                    }
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception.Message);
                }
    
                return sb.ToString();
            }
        }
    }
    

提交回复
热议问题