TextMap.vb
'' 完毕:
Imports System.IO
Imports System.Drawing
Imports System.Numerics
Imports System.Collections.Generic
Imports System.Linq
Imports GrapeCity.Documents.Text
Imports GrapeCity.Documents.Drawing
Imports GrapeCity.Documents.Pdf
Imports GrapeCity.Documents.Pdf.Annotations
Imports GrapeCity.Documents.Pdf.Graphics
Imports GrapeCity.Documents.Pdf.TextMap

'' 此示例演示如何在 PDF 中使用页面的文本映射
'' 查找页面上文本行的几何位置,
'' 并将文本定位在特定位置。
'' 本示例中使用的 PDF 是由 TimeSheet 创建的。
Public Class TextMap
    Function CreatePDF(ByVal stream As Stream) As Integer
        Dim doc = New GcPdfDocument()
        Dim page = doc.NewPage()

        Dim rc = Util.AddNote(
            "此示例将 TimeSheet 示例创建的 PDF 加载到临时 GcPdfDocument 中," +
            "",
            page)

        '' 设置文本格式和布局:
        Dim tf = New TextFormat() With
        {
            .Font = StandardFonts.Times,
            .FontSize = 13
        }
        Dim tfFound = New TextFormat() With
        {
            .Font = StandardFonts.TimesBold,
            .FontSize = 14,
            .ForeColor = Color.DarkBlue
        }
        Dim tl = New TextLayout(72) With
        {
            .MaxWidth = doc.PageSize.Width,
            .MaxHeight = doc.PageSize.Height,
            .MarginAll = rc.Left,
            .MarginTop = rc.Bottom + 36,
            .TabStops = New List(Of TabStop)() From {New TabStop(72 * 2)}
        }
        Dim tso = New TextSplitOptions(tl) With
        {
            .MinLinesInFirstParagraph = 2,
            .MinLinesInLastParagraph = 2,
            .RestMarginTop = rc.Left
        }

        '' 打开任意 PDF,将其加载到临时文档中并使用地图查找一些文本:
        Using fs = New FileStream(Path.Combine("Resources", "PDFs", "TimeSheet.pdf"), FileMode.Open, FileAccess.Read)
            Dim doc1 = New GcPdfDocument()
            doc1.Load(fs)
            Dim tmap = doc1.Pages(0).GetTextMap()

            '' 我们在页面上特定的(我们已知的)几何位置检索文本:
            Dim tx0 = 2.1F, ty0 = 3.37F, tx1 = 3.1F, ty1 = 3.5F
            Dim htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72)
            Dim htiTo = tmap.HitTest(ty0 * 72, ty1 * 72)
            Dim range1 As TextMapFragment = Nothing, text1 As String = Nothing
            tmap.GetFragment(htiFrom.Pos, htiTo.Pos, range1, text1)
            tl.AppendLine($"Looked for text inside rectangle x={tx0:F2}"", y = {ty0: f2}"", width={tx1 - tx0:F2}"", height = {ty1 - ty0: f2}"", found:", tf)
            tl.AppendLine(text1, tfFound)
            tl.AppendLine()

            '' 获取所有文本片段及其在页面上的位置:
            tl.AppendLine("List of all texts found on the page", tf)
            Dim range As TextMapFragment = Nothing, text As String = Nothing
            tmap.GetFragment(range, text)
            For Each tlf In range
                Dim coords = tmap.GetCoords(tlf)
                tl.Append($"Text at ({coords.B.X / 72:F2}"", {coords.B.Y / 72:F2}""):{vbTab}", tf)
                tl.AppendLine(tmap.GetText(tlf), tfFound)
            Next

            '' 打印结果:
            tl.PerformLayout(True)
            While True
                '' 'rest' 将接受不适合的文本:
                Dim rest As TextLayout = Nothing
                Dim splitResult = tl.Split(tso, rest)
                doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
                If splitResult <> SplitResult.Split Then
                    Exit While
                End If
                tl = rest
                doc.NewPage()
            End While

            '' 附上原始文件以供参考:
            doc.MergeWithDocument(doc1, New MergeDocumentOptions())

            '' 完毕:
            doc.Save(stream)
        End Using
        Return doc.Pages.Count
    End Function
End Class