'' 完毕:
Imports System.IO
Imports System.Drawing
Imports System.Numerics
Imports System.Collections.Generic
Imports System.Linq
Imports GrapeCity.Documents.Text
Imports GrapeCity.Documents.Drawing
Imports GrapeCity.Documents.Pdf
Imports GrapeCity.Documents.Pdf.Annotations
Imports GrapeCity.Documents.Pdf.Graphics
Imports GCTEXT = GrapeCity.Documents.Text
Imports GCDRAW = GrapeCity.Documents.Drawing
'' 此示例演示如何从现有 PDF 中提取文本。
'' 它将任意 PDF 加载到临时 GcPdfDocument 中,然后
'' 使用 Page.GetText() 方法从该文档的每个页面检索文本,
'' 将所有这些文本添加到 TextLayout 并将其呈现到当前文档中。
'' Page.GetText() 的替代方法是 GcPdfDocument.GetText() 方法
'' 它立即从整个文档中检索文本。
Public Class ExtractParagraphs
Function CreatePDF(ByVal stream As Stream) As Integer
Const margin = 36
Dim c1 = Color.PaleGreen
Dim c2 = Color.PaleGoldenrod
Dim doc = New GcPdfDocument()
Dim page = doc.NewPage()
Dim rc = Util.AddNote(
"这里我们将现有的 PDF (Wetlands) 加载到临时 GcPdfDocument 中," +
"",
page,
New RectangleF(margin, margin, page.Size.Width - margin * 2, 0))
'' 标题的文本格式:
Dim tf = New TextFormat() With
{
.Font = GCTEXT.Font.FromFile(Path.Combine("Resources", "Fonts", "yumin.ttf")),
.FontSize = 14,
.ForeColor = Color.Blue
}
'' 段落的文本格式:
Dim tfpar = New TextFormat() With
{
.Font = StandardFonts.Times,
.FontSize = 12,
.BackColor = c1
}
'' 渲染文本的文本布局:
Dim tl = page.Graphics.CreateTextLayout()
tl.MaxWidth = doc.PageSize.Width
tl.MaxHeight = doc.PageSize.Height
tl.MarginAll = rc.Left
tl.MarginTop = rc.Bottom + 36
'' 寡妇/孤儿控制的文本分割选项:
Dim topt = New TextSplitOptions(tl) With
{
.MinLinesInFirstParagraph = 2,
.MinLinesInLastParagraph = 2,
.RestMarginTop = rc.Left
}
'' 打开任意 PDF,将其加载到临时文档中并获取所有页面文本
Using fs = File.OpenRead(Path.Combine("Resources", "PDFs", "Wetlands.pdf"))
Dim doc1 = New GcPdfDocument()
doc1.Load(fs)
For i = 0 To doc1.Pages.Count - 1
tl.AppendLine(String.Format("原始 PDF 第 {0} 页的段落:", i + 1), tf)
Dim pg = doc1.Pages(i)
Dim pars = pg.GetTextMap().Paragraphs
For Each par In pars
tl.AppendLine(par.GetText(), tfpar)
If tfpar.BackColor = c1 Then
tfpar.BackColor = c2
Else
tfpar.BackColor = c1
End If
Next
Next
tl.PerformLayout(True)
While True
'' 'rest' 将接受不适合的文本:
Dim rest As TextLayout = Nothing
Dim splitResult = tl.Split(topt, rest)
doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
If splitResult <> SplitResult.Split Then
Exit While
End If
tl = rest
doc.NewPage()
End While
'' 附上原始文件以供参考:
doc.MergeWithDocument(doc1, New MergeDocumentOptions())
'' 完毕:
doc.Save(stream)
Return doc.Pages.Count
End Using
End Function
End Class