ExtractText.vb
'' 完毕:
Imports System.IO
Imports System.Drawing
Imports GrapeCity.Documents.Text
Imports GrapeCity.Documents.Pdf
Imports GCTEXT = GrapeCity.Documents.Text
Imports GCDRAW = GrapeCity.Documents.Drawing

'' 此示例演示如何从现有 PDF 中提取文本。
'' 它将任意 PDF 加载到临时 GcPdfDocument 中,然后
'' 使用 Page.GetText() 方法从该文档的每个页面检索文本,
'' 将所有这些文本添加到 TextLayout 并将其呈现到当前文档中。
'' Page.GetText() 的替代方法是 GcPdfDocument.GetText() 方法
'' 它立即从整个文档中检索文本。
Public Class ExtractText
    Function CreatePDF(ByVal stream As Stream) As Integer
        Dim doc = New GcPdfDocument()
        Dim page = doc.NewPage()
        Dim rc = Util.AddNote(
            "此示例将任意 PDF 加载到临时 GcPdfDocument 中," +
            "",
            page)

        '' 标题的文本格式:
        Dim tf = New TextFormat() With
        {
            .Font = GCTEXT.Font.FromFile(Path.Combine("Resources", "Fonts", "yumin.ttf")),
            .FontSize = 14,
            .ForeColor = Color.Blue
        }
        '' 渲染文本的文本布局:
        Dim tl = New TextLayout(72)
        tl.DefaultFormat.Font = StandardFonts.Times
        tl.DefaultFormat.FontSize = 12
        tl.MaxWidth = doc.PageSize.Width
        tl.MaxHeight = doc.PageSize.Height
        tl.MarginAll = rc.Left
        tl.MarginTop = rc.Bottom + 36

        '' 寡妇/孤儿控制的文本分割选项:
        Dim topt = New TextSplitOptions(tl) With
        {
            .MinLinesInFirstParagraph = 2,
            .MinLinesInLastParagraph = 2,
            .RestMarginTop = rc.Left
        }

        '' 打开任意 PDF,将其加载到临时文档中并获取所有页面文本:
        Using fs As New FileStream(Path.Combine("Resources", "PDFs", "Wetlands.pdf"), FileMode.Open, FileAccess.Read)
            Dim doc1 = New GcPdfDocument()
            doc1.Load(fs)

            '' 获取加载文档页面的文本:
            Dim texts = New List(Of String)()
            doc1.Pages.ToList().ForEach(Sub(p_) texts.Add(p_.GetText()))

            '' 将文本和标题添加到文本布局:
            For i = 0 To texts.Count - 1
                tl.AppendLine(String.Format("已加载文档第 {0} 页的文本:", i + 1), tf)
                tl.AppendLine(texts(i))
            Next
            tl.PerformLayout(True)
            While True
                '' 'rest' 将接受不适合的文本:
                Dim rest As TextLayout = Nothing
                Dim splitResult = tl.Split(topt, rest)
                doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
                If splitResult <> SplitResult.Split Then
                    Exit While
                End If
                tl = rest
                doc.NewPage()
            End While
        End Using
        '' 完毕:
        doc.Save(stream)
        Return doc.Pages.Count
    End Function
End Class