c# 将html转化到word
1)HtmlToWordConverter.cs
cs
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using HtmlAgilityPack;
using System.Collections.Generic;
namespace ConsoleApp9
{
public class HtmlToWordConverter
{
public void ConvertHtmlToWord(List<string> bodyStrings, string titleString, string docxFilePath)
{
using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(docxFilePath, WordprocessingDocumentType.Document))
{
MainDocumentPart mainPart = wordDocument.AddMainDocumentPart();
mainPart.Document = new Document();
Body body = mainPart.Document.AppendChild(new Body());
// Add centered title
if (!string.IsNullOrEmpty(titleString))
{
Paragraph titleParagraph = new Paragraph();
Run titleRun = new Run(new Text(titleString));
titleParagraph.AppendChild(new ParagraphProperties(new Justification() { Val = JustificationValues.Center }));
titleParagraph.Append(titleRun);
body.Append(titleParagraph);
}
// Convert each bodyString to Word content
foreach (string bodyString in bodyStrings)
{
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(bodyString);
ConvertHtmlNodesToWord(htmlDoc.DocumentNode.ChildNodes, body);
}
}
}
private void ConvertHtmlNodesToWord(HtmlNodeCollection nodes, OpenXmlElement parent)
{
foreach (HtmlNode node in nodes)
{
switch (node.Name)
{
case "p": // Paragraph
Paragraph para = new Paragraph();
Run run = new Run();
ConvertHtmlToRun(node.ChildNodes, run);
para.Append(run);
parent.Append(para);
break;
case "h1": // Heading 1
case "h2": // Heading 2
case "h3": // Heading 3
case "h4": // Heading 4
case "h5": // Heading 5
case "h6": // Heading 6
Paragraph heading = new Paragraph();
Run runHeading = new Run();
RunProperties runPropHeading = new RunProperties();
// Set heading style based on HTML heading level
int headingLevel = int.Parse(node.Name.Substring(1)); // Extract the number from "h1" to "1"
if (headingLevel >= 1 && headingLevel <= 6)
{
runPropHeading.Append(new Bold());
runPropHeading.Append(new FontSize() { Val = new StringValue((24 - (headingLevel - 1) * 2).ToString()) });
runPropHeading.Append(new DocumentFormat.OpenXml.Wordprocessing.Color() { Val = "2E74B5" }); // You can adjust the color as needed
}
runHeading.Append(runPropHeading);
ConvertHtmlToRun(node.ChildNodes, runHeading);
heading.Append(runHeading);
parent.Append(heading);
break;
// Add more cases for other HTML elements like tables, lists, etc. as needed
default:
break;
}
if (node.HasChildNodes)
{
ConvertHtmlNodesToWord(node.ChildNodes, parent);
}
}
}
private void ConvertHtmlToRun(HtmlNodeCollection nodes, Run run)
{
foreach (HtmlNode node in nodes)
{
switch (node.Name)
{
case "#text": // Text node
run.AppendChild(new Text(node.InnerText));
break;
case "strong": // Strong (bold) text
RunProperties runPropStrong = new RunProperties(new Bold());
run.AppendChild(runPropStrong);
ConvertHtmlToRun(node.ChildNodes, run);
break;
case "em": // Emphasized (italic) text
RunProperties runPropEm = new RunProperties(new Italic());
run.AppendChild(runPropEm);
ConvertHtmlToRun(node.ChildNodes, run);
break;
// Add more cases for other HTML formatting elements as needed
default:
ConvertHtmlToRun(node.ChildNodes, run);
break;
}
}
}
}
}
2)Program.cs
cs
using System;
using System.Collections.Generic;
namespace ConsoleApp9
{
class Program
{
static void Main(string[] args)
{
List<string> bodyStrings = new List<string>{
@"<h1>内容1</h1>
<p>第一段内容</p>
<p>第二段内容</p>",
@"<h1>内容2</h1>
<p>第一段内容</p>
<p>第二段内容</p>"};
string titleString = "HTML测试";
string docxFilePath = AppDomain.CurrentDomain.BaseDirectory + "tests.docx";
HtmlToWordConverter converter = new HtmlToWordConverter();
converter.ConvertHtmlToWord(bodyStrings, titleString, docxFilePath);
Console.WriteLine("生成word成功");
}
}
}