c# 将html转化到word

c# 将html转化到word

1)HtmlToWordConverter.cs

cs 复制代码
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using HtmlAgilityPack;
using System.Collections.Generic;

namespace ConsoleApp9
{
    public class HtmlToWordConverter
    {
        public void ConvertHtmlToWord(List<string> bodyStrings, string titleString, string docxFilePath)
        {
            using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(docxFilePath, WordprocessingDocumentType.Document))
            {
                MainDocumentPart mainPart = wordDocument.AddMainDocumentPart();
                mainPart.Document = new Document();
                Body body = mainPart.Document.AppendChild(new Body());

                // Add centered title
                if (!string.IsNullOrEmpty(titleString))
                {
                    Paragraph titleParagraph = new Paragraph();
                    Run titleRun = new Run(new Text(titleString));
                    titleParagraph.AppendChild(new ParagraphProperties(new Justification() { Val = JustificationValues.Center }));
                    titleParagraph.Append(titleRun);
                    body.Append(titleParagraph);
                }

                // Convert each bodyString to Word content
                foreach (string bodyString in bodyStrings)
                {
                    HtmlDocument htmlDoc = new HtmlDocument();
                    htmlDoc.LoadHtml(bodyString);

                    ConvertHtmlNodesToWord(htmlDoc.DocumentNode.ChildNodes, body);
                }
            }
        }

        private void ConvertHtmlNodesToWord(HtmlNodeCollection nodes, OpenXmlElement parent)
        {
            foreach (HtmlNode node in nodes)
            {
                switch (node.Name)
                {
                    case "p": // Paragraph
                        Paragraph para = new Paragraph();
                        Run run = new Run();
                        ConvertHtmlToRun(node.ChildNodes, run);
                        para.Append(run);
                        parent.Append(para);
                        break;

                    case "h1": // Heading 1
                    case "h2": // Heading 2
                    case "h3": // Heading 3
                    case "h4": // Heading 4
                    case "h5": // Heading 5
                    case "h6": // Heading 6
                        Paragraph heading = new Paragraph();
                        Run runHeading = new Run();
                        RunProperties runPropHeading = new RunProperties();

                        // Set heading style based on HTML heading level
                        int headingLevel = int.Parse(node.Name.Substring(1)); // Extract the number from "h1" to "1"
                        if (headingLevel >= 1 && headingLevel <= 6)
                        {
                            runPropHeading.Append(new Bold());
                            runPropHeading.Append(new FontSize() { Val = new StringValue((24 - (headingLevel - 1) * 2).ToString()) });
                            runPropHeading.Append(new DocumentFormat.OpenXml.Wordprocessing.Color() { Val = "2E74B5" }); // You can adjust the color as needed
                        }
                        runHeading.Append(runPropHeading);
                        ConvertHtmlToRun(node.ChildNodes, runHeading);
                        heading.Append(runHeading);
                        parent.Append(heading);
                        break;

                    // Add more cases for other HTML elements like tables, lists, etc. as needed

                    default:
                        break;
                }

                if (node.HasChildNodes)
                {
                    ConvertHtmlNodesToWord(node.ChildNodes, parent);
                }
            }
        }

        private void ConvertHtmlToRun(HtmlNodeCollection nodes, Run run)
        {
            foreach (HtmlNode node in nodes)
            {
                switch (node.Name)
                {
                    case "#text": // Text node
                        run.AppendChild(new Text(node.InnerText));
                        break;

                    case "strong": // Strong (bold) text
                        RunProperties runPropStrong = new RunProperties(new Bold());
                        run.AppendChild(runPropStrong);
                        ConvertHtmlToRun(node.ChildNodes, run);
                        break;

                    case "em": // Emphasized (italic) text
                        RunProperties runPropEm = new RunProperties(new Italic());
                        run.AppendChild(runPropEm);
                        ConvertHtmlToRun(node.ChildNodes, run);
                        break;

                    // Add more cases for other HTML formatting elements as needed

                    default:
                        ConvertHtmlToRun(node.ChildNodes, run);
                        break;
                }
            }
        }
    }
}

2)Program.cs

cs 复制代码
using System;
using System.Collections.Generic;

namespace ConsoleApp9
{
    class Program
    {
        static void Main(string[] args)
        {
            List<string> bodyStrings = new List<string>{
            @"<h1>内容1</h1>
            <p>第一段内容</p>
            <p>第二段内容</p>",
            @"<h1>内容2</h1>
            <p>第一段内容</p>
            <p>第二段内容</p>"};

            string titleString = "HTML测试";
            string docxFilePath = AppDomain.CurrentDomain.BaseDirectory + "tests.docx";

            HtmlToWordConverter converter = new HtmlToWordConverter();
            converter.ConvertHtmlToWord(bodyStrings, titleString, docxFilePath);

            Console.WriteLine("生成word成功");
        }
    }
}
相关推荐
pingan878710 分钟前
试试 docx.js 一键生成 Word 文档,效果很不错
开发语言·前端·javascript·ecmascript·word
封印师请假去地球钓鱼1 小时前
问题解决|word中单独一页设置横向
word
l1t1 小时前
DeepSeek总结的用 C# 构建 DuckDB 插件说明
前端·数据库·c#·插件·duckdb
iReachers2 小时前
恒盾C#混淆加密大师 1.4.5 最新2026版本发布 (附CSDN下载地址)
c#·c#混淆·c#加密·wpf加密·winform加密
Predestination王瀞潞2 小时前
5.4.1 通信->WWW万维网内容访问标准(W3C):WWW(World Wide Web)基本信息&核心设计目标&现实意义
css·网络·网络协议·html·url·www
小小算法师2 小时前
分节符、分页符、分栏符,一次性讲明白
word
历程里程碑3 小时前
43. TCP -2实现英文查中文功能
java·linux·开发语言·c++·udp·c#·排序算法
月巴月巴白勺合鸟月半3 小时前
一次PDF文件的处理(二)
pdf·c#
封印师请假去地球钓鱼4 小时前
问题解决|Word大纲层级混乱?在word导航窗格中调整目录缩进
word
汽车芯猿4 小时前
Word频繁崩溃?一步解决 Office 加载项冲突
word