How to create an XML document that contains tags for all content control names in a Microsoft Word document, with Open XML and C#

 

If you want to bind content controls in a Microsoft Word document to data, you might want to use a CustomXmlPart. This is a XML document that binds content controls to xml tag names.

 

The xml produced will look like:

<?xml version="1.0" encoding="utf-8"?>
<Root>
<Employee_EmployeeCity></Employee_EmployeeCity>
<Employee_CompetitionAttachment></Employee_CompetitionAttachment>
<Employee_FirstName></Employee_FirstName>
<Employee_FormattedName></Employee_FormattedName>
<Employee_HandinDate></Employee_HandinDate>
<Employee_HouseNumber></Employee_HouseNumber>
<Employee_HouseSuffix></Employee_HouseSuffix>
<Employee_PostalCode></Employee_PostalCode>
<Employee_RelationAttachment></Employee_RelationAttachment>
<Employee_Salutation></Employee_Salutation>
<Employee_Street></Employee_Street>
<Enlistment_CompetitionClause></Enlistment_CompetitionClause>
<Enlistment_EmployeeFirstDay></Enlistment_EmployeeFirstDay>
<Enlistment_RelationshipClause></Enlistment_RelationshipClause>
<Enlistment_TraineeTravel></Enlistment_TraineeTravel>
<Function_CurrentDate></Function_CurrentDate>
<Function_GetManager></Function_GetManager>
<ServiceRequest_Id></ServiceRequest_Id>
</Root>

 

To produce that XML document based on all content control names in a Microsoft Word document, you can use the following code:

namespace EndToEndTest
{
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml.Linq;
[TestClass]
public class RliResearch
{
[TestMethod]
public void Test_with_duration()
{
var watch = new System.Diagnostics.Stopwatch();
watch.Start();
string sourcePath = @"C:\Temp\Test.docx";
string destinationPath = string.Format(@"C:\Temp\{0}_Test.xml", Guid.NewGuid());
Create_XML_file_for_CustomXmlPart(sourcePath, destinationPath);
watch.Stop();
System.Console.WriteLine(watch.Elapsed.TotalMilliseconds);
}
public void Create_XML_file_for_CustomXmlPart(string sourcePath, string destinationPath)
{
var names = new List<string>();
using (Stream content = GetFileContent(sourcePath))
{
names = GetUniqueContentControlNames(content);
}
XDocument document = CreateXmlDocument("Root", names);
document.Save(destinationPath);
}
/// <summary>
/// Gets the contents of the file as a stream, don't forget to use this function in a "using" statement.
/// </summary>
public Stream GetFileContent(string path)
{
byte[] contentBytes = File.ReadAllBytes(path);
var stream = new MemoryStream(contentBytes);
return stream;
}
/// <summary>
/// Create a XDocument, with a root element based on the given "rootName", containing child elements based on the given "tagnames".
/// </summary>
public XDocument CreateXmlDocument(string rootName, List<string> tagNames)
{
var document = new XDocument(
new XDeclaration("1.0", "utf-8", null),
new XElement(rootName,
tagNames.Select(x => new XElement(x, string.Empty)
)
)
);
return document;
}
/// <summary>
/// Gets all unique content control names in the given document.
/// </summary>
public List<string> GetUniqueContentControlNames(Stream content)
{
var names = new List<string>();
using (WordprocessingDocument doc =
WordprocessingDocument.Open(content, false))
{
foreach (var cc in doc.ContentControls())
{
SdtProperties props = cc.Elements<SdtProperties>().FirstOrDefault();
Tag tag = props.Elements<Tag>().FirstOrDefault();
if (tag != null)
{
names.Add(tag.Val);
}
}
}
// Makes names unique and order by name.
var uniqueNames = names.Distinct(StringComparer.CurrentCultureIgnoreCase).OrderBy(x => x).ToList();
return uniqueNames;
}
}
/// <summary>
/// Code from: http://openxmldeveloper.org/blog/b/openxmldeveloper/archive/2011/04/11/137383.aspx
/// </summary>
public static class ContentControlExtensions
{
public static IEnumerable<OpenXmlElement> ContentControls(
this OpenXmlPart part)
{
return part.RootElement
.Descendants()
.Where(e => e is SdtBlock || e is SdtRun);
}
public static IEnumerable<OpenXmlElement> ContentControls(
this WordprocessingDocument doc)
{
foreach (var cc in doc.MainDocumentPart.ContentControls())
yield return cc;
foreach (var header in doc.MainDocumentPart.HeaderParts)
foreach (var cc in header.ContentControls())
yield return cc;
foreach (var footer in doc.MainDocumentPart.FooterParts)
foreach (var cc in footer.ContentControls())
yield return cc;
if (doc.MainDocumentPart.FootnotesPart != null)
foreach (var cc in doc.MainDocumentPart.FootnotesPart.ContentControls())
yield return cc;
if (doc.MainDocumentPart.EndnotesPart != null)
foreach (var cc in doc.MainDocumentPart.EndnotesPart.ContentControls())
yield return cc;
}
}
}

Leave a Reply

Your email address will not be published. Required fields are marked *