主要是对 《一次PDF文件的处理(一)》进行详细的代码说明:
显示PDF
这个比较简单 。使用 private DevExpress.XtraPdfViewer.PdfViewer
pdfViewer1.LoadDocument(@"美文晨读.pdf");
读取PDF的内容
使用PdfDataRecognizer
page.Rect = new RectangleF((float)CropBox.Left, (float)CropBox.Top, (float)CropBox.Width, (float)CropBox.Height);
PdfPageData pdata = PdfDataRecognizer.Recognize(pdfpage, false, false);
foreach (PdfWord w in pdata.Words)
{
foreach (PdfCharacter c in w.Characters)
{
if (c.UnicodeData == "")
continue;
PDFWord ow = new PDFWord();
ow.Text = c.UnicodeData;
ow.TextRect.Rect = new RectangleF((float)c.Rectangle.Left, (float)(c.Rectangle.Top), (float)c.Rectangle.Width, (float)c.Rectangle.Height);
page.Words.Add(ow);
}
}
预览PDF时显示调试信息
使用PdfViewer的Paint事件

public void paint_debug_info(PaintEventArgs e, int PageNumber)
{
PDFPage page = _document.GetPage(PageNumber);
if (page == null)
return;
PointF pt1 = _viewer.GetClientPoint(new PdfDocumentPosition(PageNumber, new PdfPoint(page.Rect.Left, page.Rect.Top)));
PointF pt2 = _viewer.GetClientPoint(new PdfDocumentPosition(PageNumber, new PdfPoint(page.Rect.Right, page.Rect.Top - page.Rect.Height)));
e.Graphics.DrawRectangle(Pens.Blue, pt1.X, pt1.Y, pt2.X - pt1.X, pt2.Y - pt1.Y);
foreach (PDFWord w in page.Words)
{
if (w.Text == "")
continue;
PdfDocumentPosition pp1 = new PdfDocumentPosition(PageNumber, new PdfPoint(w.TextRect.Rect.Left, w.TextRect.Rect.Top));
PdfDocumentPosition pp2 = new PdfDocumentPosition(PageNumber, new PdfPoint(w.TextRect.Rect.Right, w.TextRect.Rect.Top - w.TextRect.Rect.Height));
pt1 = _viewer.GetClientPoint(pp1);
pt2 = _viewer.GetClientPoint(pp2);
if (_show_line)
{
ExtractPdfPageHTML(_viewer, curr_pagenum);
}
if ((pt1.Y > 0) || (pt2.Y > 0))
{
if (_show_pt)
{
Rectangle r = new Rectangle((int) pt1.X, (int)pt1.Y, (int)pt2.X- (int)pt1.X, (int)pt2.Y - (int)pt1.Y);
e.Graphics.DrawRectangle(Pens.Blue, r);
//e.Graphics.DrawLine(Pens.Blue, pt1, pt2);
}
if (_show_txt)
e.Graphics.DrawString(w.Text, drawFont, drawBrush, pt1);
if (_show_line)
{
if (w.PinYinWord.Count > 0)
{
foreach (PDFWord w2 in w.PinYinWord)
{
PdfDocumentPosition pp1_2 = new PdfDocumentPosition(PageNumber, new PdfPoint(w2.TextRect.Rect.Left, w2.TextRect.Rect.Top));
PdfDocumentPosition pp2_2 = new PdfDocumentPosition(PageNumber, new PdfPoint(w2.TextRect.Rect.Right, w2.TextRect.Rect.Top - w2.TextRect.Rect.Height));
PointF pt1_2 = _viewer.GetClientPoint(pp1_2);
PointF pt2_2 = _viewer.GetClientPoint(pp2_2);
e.Graphics.DrawLine(Pens.Blue, (pt1.X+pt2.X)/2, (pt1.Y + pt2.Y) / 2, (pt1_2.X + pt2_2.X) / 2, (pt1_2.Y + pt2_2.Y) / 2);
}
}
}
}
}
}
计算汉字和拼音的位置
for (int i=0;i < lm.Lines.Count;i++)
{
Line line = lm.Lines[i];
if (line.item_w_AVG < lm.line_item_w_AVG)
{
if (i < lm.Lines.Count - 1)
{
Line line2 = lm.Lines[i + 1];
if ((line2.Rect.Top + line2.Rect.Height) > (line.Rect.Top - line2.Rect.Height))
{
foreach (PDFWord w2 in line2.Words)
{
w2.PinYin = "";
w2.PinYinWord.Clear();
}
foreach (PDFWord w in line.Words)
{
float d_min = 0;
float i_min = -1;
float x = w.TextRect.Rect.X + w.TextRect.Rect.Width / 2;
float y = w.TextRect.Rect.Y - w.TextRect.Rect.Height / 2;
PDFWord w_txt = null;
foreach (PDFWord w2 in line2.Words)
{
float x2 = w2.TextRect.Rect.X + w2.TextRect.Rect.Width / 2;
float y2 = w2.TextRect.Rect.Y - w2.TextRect.Rect.Height / 2;
float d = (x2 - x) * (x2 - x) + (y2 - y) * (y2 - y);
if (w_txt== null)
{
d_min = d;
w_txt = w2;
}
else if (d < d_min) {
d_min = d;
w_txt = w2;
}
}
if (w_txt != null)
{
w_txt.PinYin = w_txt.PinYin + w.Text;
w_txt.PinYinWord.Add(w);
}
}
}
}
}
else
{
if (line.Rect.Left > lm.line_x_MIN + lm.line_item_w_AVG)
sb.AppendLine(@"<br /> ");
foreach (PDFWord w2 in line.Words)
{
if (w2.PinYin=="")
sb.AppendLine(@" <span class=""char-group""><span class=""pinyin""> </span><span class=""hanzi"">" + w2.Text + @"</span></span>");
else
sb.AppendLine(@" <span class=""char-group""><span class=""pinyin"">"+w2.PinYin+@"</span><span class=""hanzi"">"+w2.Text+@"</span></span>");
}
}
}