上一篇博文(用VBA根据Word中的表格生成具有完全相同表结构的Markdown表或HTML表代码)完成后,拿着代码去第一个实验的Mistral嘚瑟一下,顺便臭一下它的水平不够,然后它臭了我一下:你这个没有处理嵌套表格,建议增加处理嵌套表格的功能。我让它给我生成处理嵌套表格的代码,结果折腾了一天它也没能成功。我一开始也小看了难度,自己鼓捣了一番也没成功。然后把题目出给其它AI,经过多次循循善诱,最接近成功的是最近长思考上线的Kimi------它给出了一个创建临时文档并在其中创建tempRng以复制单元格内容的方案,在我用下面的截图中最后一个魔鬼难度测试案例测试前执行结果基本符合预期(表结构正确,部分单元格文本内容有错误),但是败在魔鬼难度案例之下。我认为创建临时文档并在其中创建tempRng以复制单元格内容不但执行效率低,在用下面的魔法取得rowspan和colspan时也容易出错(有可能用到错误的临时文档中的Selection,也因此导致Kimi给出的代码读取单元格文本内容出错):
vbnet
' 计算合并单元格(仅当前表格内)
With tmpCell
.Select
Selection.SelectRow: rSpan = Selection.Rows.Count
.Select
Selection.SelectColumn: cSpan = Selection.Columns.Count
End With
想了一下,临时Range不过就是用来粘贴单元格内容,完全可以在源文档末尾创建,无需创建新文档,这样的话当前单元格对应的临时Range的结束位置总是源文档末尾,只是由于要使用递归(处理嵌套表格天然适宜用递归),当前单元格对应的临时Range的开始位置可能因为进入下一层递归而发生变化------递归不是天然与堆栈有关联吗?何不在每次递归时将当前单元格对应的临时Range的开始位置压入堆栈,本单元格递归处理完成后将其开始位置取出,然后将该位置直至文末区域删除不就行了吗?将这个思路喂给Kimi,Kimi修改了它上一次给出的代码,在这份代码的基础上我只做了很小的修改就得到了以下结果:
vbnet
Sub TableToHtmlWithNestedTable()
Dim html As String, oTable As Table
Dim numRows As Long, numCols As Long, rowIndex As Long, colIndex As Long
Dim oCell As Word.cell, doc As Document
Application.ScreenUpdating = False
If Not Selection.Information(wdWithInTable) Then
MsgBox "请将光标放在表格内", vbExclamation
Exit Sub
End If
Set doc = ActiveDocument
Set oTable = Selection.Tables(1)
numRows = oTable.Rows.Count
numCols = oTable.Columns.Count
html = "<table>" & vbCrLf
For rowIndex = 1 To numRows
html = html & " <tr>" & vbCrLf
For colIndex = 1 To numCols
On Error Resume Next
Set oCell = oTable.cell(rowIndex, colIndex)
If Err.Number <> 0 Then
On Error GoTo 0
GoTo nextCol
End If
On Error GoTo 0
Dim rSpan As Long, cSpan As Long
With oCell
.Select: Selection.SelectColumn: cSpan = Selection.Columns.Count
.Select: Selection.SelectRow: rSpan = Selection.Rows.Count
End With
Dim cellContent As String, currPos As Long
currPos = 0
cellContent = GetCellContent(doc, oCell, currPos)
Dim tag As String: tag = IIf(rowIndex = 1, "th", "td")
html = html & " <" & tag
If rSpan > 1 Then html = html & " rowspan=""" & rSpan & """"
If cSpan > 1 Then html = html & " colspan=""" & cSpan & """"
html = html & ">" & cellContent & "</" & tag & ">" & vbCrLf
nextCol:
Next colIndex
html = html & " </tr>" & vbCrLf
Next rowIndex
html = html & "</table>" & vbCrLf
Selection.start = oTable.Range.End
Dim bInsert As VbMsgBoxResult
bInsert = MsgBox("是否在表格下方输出HTML代码?", vbQuestion + vbYesNo, "确认")
If bInsert = vbYes Then
Selection.InsertAfter html & vbCrLf
Selection.Range.HighlightColorIndex = wdYellow
End If
With CreateObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
.SetText html: .PutInClipboard
End With
If depthWarning Then
MsgBox "已生成HTML代码(含警告:嵌套表格超过5层)", vbExclamation
Else
MsgBox "已生成HTML代码并复制到剪贴板", vbOKOnly
End If
Application.ScreenUpdating = True
End Sub
Function GetCellContent(doc As Document, oCell As cell, ByRef currPos As Long) As String
Dim html As String, tempStart As Long ' 临时范围起始位置(存入堆栈)
Dim stack() As Long, stackPtr As Long ' 堆栈及指针
Dim cellRng As Range, tempRng As Range ' 单元格范围及临时范围
Dim tblCount As Integer, i As Integer
Dim tmpTbl As Table, tmpCell As cell
Dim numRows As Integer, numCols As Integer
Dim rowIndex As Integer, colIndex As Integer
Dim rSpan As Integer, cSpan As Integer, cellContent As String, tag As String
' 初始化堆栈(存储临时范围起始位置)
stackPtr = 0
ReDim stack(0)
' 1. 复制当前单元格内容(排除末尾回车符)
Set cellRng = doc.Range(oCell.Range.start, oCell.Range.End - 1)
' 2. 在源文档末尾创建临时范围并压栈
Set tempRng = doc.content
tempRng.Collapse wdCollapseEnd
tempStart = tempRng.start
stackPtr = stackPtr + 1
ReDim Preserve stack(stackPtr)
stack(stackPtr) = tempStart ' 压栈
' 3. 粘贴内容到临时范围
If cellRng.End > cellRng.start Then
cellRng.Copy
tempRng.Paste
End If
Set tempRng = doc.Range(tempStart, doc.content.End) ' 更新临时范围
' 4. 处理临时范围内的内容
tblCount = tempRng.Tables.Count
If tblCount > 0 Then
For i = 1 To tblCount
Set tmpTbl = tempRng.Tables(i)
' 处理表格前的文本
If currPos < tmpTbl.Range.start Then
html = html & ProcessText(doc.Range(tempStart + currPos, tmpTbl.Range.start))
currPos = tmpTbl.Range.start - tempStart ' 相对临时范围的位置
End If
' 生成表格标签
html = html & "<table>" & vbCrLf
numRows = tmpTbl.Rows.Count
numCols = tmpTbl.Columns.Count
' 遍历行列
For rowIndex = 1 To numRows
html = html & " <tr>" & vbCrLf
For colIndex = 1 To numCols
' 跳过合并单元格
On Error Resume Next
Set tmpCell = tmpTbl.cell(rowIndex, colIndex)
If Err.Number <> 0 Then
On Error GoTo 0
GoTo nextCol
End If
On Error GoTo 0
' 计算合并单元格(仅当前表格内)
With tmpCell
.Select
Selection.SelectRow: rSpan = Selection.Rows.Count
.Select
Selection.SelectColumn: cSpan = Selection.Columns.Count
End With
' 递归处理单元格内容
currPos = 0
cellContent = GetCellContent(doc, tmpTbl.cell(rowIndex, colIndex), currPos)
' 生成单元格标签
tag = IIf(rowIndex = 1, "th", "td")
html = html & " <" & tag
If rSpan > 1 Then html = html & " rowspan=""" & rSpan & """"
If cSpan > 1 Then html = html & " colspan=""" & cSpan & """"
html = html & ">" & cellContent & "</" & tag & ">" & vbCrLf
nextCol:
Next colIndex
html = html & " </tr>" & vbCrLf
Next rowIndex
html = html & "</table>" & vbCrLf
currPos = tmpTbl.Range.End - tempStart ' 更新相对位置
Next i
' 处理表格后的文本
If currPos < tempRng.End - tempStart Then
html = html & ProcessText(doc.Range(tempStart + currPos, tempRng.End))
currPos = tempRng.End - tempStart
End If
Else
' 无表格时直接取文本
html = html & ProcessText(tempRng)
currPos = tempRng.End - tempStart
End If
' 5. 清理临时内容(从堆栈弹出起始位置并删除)
tempStart = stack(stackPtr)
stackPtr = stackPtr - 1
ReDim Preserve stack(stackPtr)
doc.Range(tempStart, doc.content.End).Delete
' 6. 返回结果
GetCellContent = html
End Function
Function ProcessText(rng As Range) As String
Dim text As String: text = rng.text
text = Replace(text, "&", "&")
text = Replace(text, "<", "<")
text = Replace(text, Chr(13), "<p/>")
text = Replace(text, Chr(11), "<br>")
ProcessText = Trim(text)
End Function
技术要点:
1、处理单元格内容时不能直接处理cell.range(否则连range.tables.count都不能准确取得,这是所有AI在我提示之前都会犯的错误),必须要将单元格中的内容复制到tempRng。
2、在源文档中直接建立tempRng,并将其开始位置保存在堆栈(或者类似的数据结构,我这里其实是用的数组模拟的堆栈)中。
用于测试的表格有以下几个:


魔鬼难度的(当然实际文档中谁设计出这样的表格该被打屁股):

生成的HTML代码粘贴到HTML文件中都能够完美重现Word文档中的表格结构。