using iTextSharp.text.pdf;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace TestIText
{
class Program
{
static void Main(string[] args)
{
PdfReader readerTemp = new PdfReader(@"D:\.pdf");
PdfHelper.LocationTextExtractionStrategyEx pz = new PdfHelper.LocationTextExtractionStrategyEx();
iTextSharp.text.pdf.parser.PdfReaderContentParser p = new iTextSharp.text.pdf.parser.PdfReaderContentParser(readerTemp);
p.ProcessContent<PdfHelper.LocationTextExtractionStrategyEx>(1, pz);
Console.WriteLine(pz.GetResultantText());
Console.ReadLine();
}
}
}
PdfHelper
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text.pdf.parser;
namespace PdfHelper
{
/// <summary>
/// Taken from http://www.java-frameworks.com/java/itext/com/itextpdf/text/pdf/parser/LocationTextExtractionStrategy.java.html
/// </summary>
class LocationTextExtractionStrategyEx : LocationTextExtractionStrategy
{
private List<TextChunk> m_locationResult = new List<TextChunk>();
private List<TextInfo> m_TextLocationInfo = new List<TextInfo>();
public List<TextChunk> LocationResult
{
get { return m_locationResult; }
}
public List<TextInfo> TextLocationInfo
{
get { return m_TextLocationInfo; }
}
/// <summary>
/// Creates a new LocationTextExtracationStrategyEx
/// </summary>
public LocationTextExtractionStrategyEx()
{
}
/// <summary>
/// Returns the result so far
/// </summary>
/// <returns>a String with the resulting text</returns>
public override String GetResultantText()
{
m_locationResult.Sort();
StringBuilder sb = new StringBuilder();
TextChunk lastChunk = null;
TextInfo lastTextInfo = null;
foreach (TextChunk chunk in m_locationResult)
{
if (lastChunk == null)
{
sb.Append(chunk.Text);
lastTextInfo = n