// 完毕:
using System;
using System.IO;
using System.Drawing;
using System.Linq;
using System.Collections.Generic;
using GrapeCity.Documents.Pdf;
using GrapeCity.Documents.Text;
using GrapeCity.Documents.Pdf.TextMap;
using GrapeCity.Documents.Pdf.Structure;
using GrapeCity.Documents.Pdf.Recognition.Structure;
namespace DsPdfWeb.Demos
{
// Find tables and read their data using structure tags.
public class ReadTagsToOutlines
{
public int CreatePDF(Stream stream)
{
var doc = new GcPdfDocument();
using var s = File.OpenRead(Path.Combine("Resources", "PDFs", "C1Olap-QuickStart.pdf"));
doc.Load(s);
// Get the LogicalStructure and top parent element:
LogicalStructure ls = doc.GetLogicalStructure();
Element root = ls.Elements[0];
// Iterate over elements and select all heading elements (H1, H2, H3 etc.):
OutlineNodeCollection outlines = doc.Outlines;
int outlinesLevel = 1;
foreach (Element e in root.Children)
{
string type = e.StructElement.Type;
if (string.IsNullOrEmpty(type) || !type.StartsWith("H"))
continue;
// Note: topmost level is 1:
if (!int.TryParse(type.Substring(1), out int headingLevel) || headingLevel < 1)
continue;
// Get the element text:
string text = e.GetText();
// Find the target page:
var page = FindPage(e.StructElement);
if (page != null)
{
var o = new OutlineNode(text, new DestinationFit(page));
if (headingLevel > outlinesLevel)
{
++outlinesLevel;
outlines = outlines.Last().Children;
}
else if (headingLevel < outlinesLevel)
{
--outlinesLevel;
var p = ((OutlineNode)outlines.Owner).Parent;
outlines = p == null ? doc.Outlines : p.Children;
}
outlines.Add(o);
}
}
doc.Save(stream);
return doc.Pages.Count;
}
private Page FindPage(StructElement se)
{
if (se.DefaultPage != null)
return se.DefaultPage;
if (se.HasChildren)
foreach (var child in se.Children)
{
var p = FindPage(child);
if (p != null)
return p;
}
return null;
}
}
}