is there any logic to get paragraph text from pdf file using itextsharp?i know pdf only supports run of texts and its hard to determine which runs of texts are related to which paragraph and also i know that there isn't any <p>
tags or other tags to determine paragraph in pdf..However i have tried to get coordinate of runs of texts to build paragraph from its coordinate but with no luck :(.
my code snippet is here:
private StringBuilder result = new StringBuilder();
private Vector lastBaseLine;
//to store run of texts
public List<string> strings = new List<String>();
//to store run of texts Coordinate (Y coordinate)
public List<float> baselines = new List<float>();
public void RenderText(iTextSharp.text.pdf.parser.TextRenderInfo renderInfo)
{
Vector curBaseline = renderInfo.GetBaseline().GetStartPoint();
if ((this.lastBaseLine != null) && (curBaseline[Vector.I2] != lastBaseLine[Vector.I2]))
{
if ((!string.IsNullOrEmpty(this.result.ToString())))
{
this.baselines.Add(this.lastBaseLine[Vector.I2]);
this.strings.Add(this.result.ToString());
}
result = new StringBuilder();
}
this.result.Append(renderInfo.GetText());
this.lastBaseLine = curBaseline;
}
Do any body have any logic related to this issue??