在用WebBrowser编程实现网页操作自动化时,常要分析网页Html,例如网页在加载数据时,常会显示"系统处理中,请稍候..",我们需要在数据加载完成后才能继续下一步操作,如何抓取这个信息的网页html元素变化,从而判断数据加载完毕呢?用IE开发者工具是不可能抓取到的,太快了。(当然,设置足够长的延时,也是可以实现的,只是不够科学及稳妥,毕竟有时因为网络原因,数据加载时间可能超过原来设定时间,其次,设置延时过长也导致程序不够友好)
实现的办法:
**1、**先用"系统处理中"查找(泛查找),并在找到html中,再细找缩小html元素范围。
vbnet
bb = FindHtmlElement("系统处理中", ExtendedWebBrowser1.Document, "", "InnerText", false)
**2、**添加一个Timer控件,设定100毫秒。根据 1中找到的元素,进行不断抓取,并将抓到的结果输出到文本。
3、 将2中输出,导入Excel,进行筛选,并从中找到重复次数少的行,便是数据加载、加载完成之间的变化。
vbnet
Private Sub TimerProgress_Tick(sender As Object, e As EventArgs) Handles TimerProgress.Tick
If Gethtmel Then
Dim bb As HtmlElement
bb = FindHtmlElement("all_jzts", ExtendedWebBrowser1.Document, "div", "id", True)
If Not bb Is Nothing Then
'WriteRunLog("Style : " + bb.Style)
WriteRunLog(bb.OuterHtml)
Else
WriteRunLog("all_jzts没找到")
End If
bb = FindHtmlElement("jzts", ExtendedWebBrowser1.Document, "div", "id", True)
If Not bb Is Nothing Then
'WriteRunLog("Style : " + bb.Style)
WriteRunLog(bb.OuterHtml)
Else
WriteRunLog("jzts没找到")
End If
'Gethtmel = False
End If
'系统处理中,请稍候...
Application.DoEvents()
End Sub
vbnet
Function FindHtmlElement(ByVal FindText As String, ByVal doc As HtmlDocument, ByVal cTagName As String, ByVal cGetAttribute As String, Optional ByVal StrictMatching As Boolean = False) As HtmlElement
'cTagName:检索具有指定 html 标记的元素,标记需要输入完整的,缺省时查找所有。
'例如:<input class="button" type="submit" value=提交 style="cursor:hand">,不能只输入"i",需要输入"input"
'cGetAttribute :比较的属性类型,取值为:Id、InnerText、Name、title、classname、value、
'Id、InnerText可以通过GetAttribute获取,也可以通过HtmlElement.Id、HtmlElement.InnerText获取,所以代码简化为用GetAttribute获取。
'doc:WebBrowserExt1.Document
'GetAttribute("classname") '例如显示class="commonTable"的值commonTable
'StrictMatching:True严格匹配FindText
'WriteRunLog("FindHtmlElement开始:" + FindText)
Try
Dim i, k As Integer
FindHtmlElement = Nothing
FindHtmlElementOfDocument = doc
If doc Is Nothing Then '2023.11.15在递归调用中,因为有些iFrames还未真正加载,从而导致传入的doc = doc.Window.Frames.Item(k).Document 为 Nothing ,从而引发异常:未将对象引用设置到对象的实例。
Exit Function
End If
If LCase(cGetAttribute) = "innertext" Then 'InnerText必须严格匹配,否则找到的结果是错误的。
'StrictMatching = True
End If
If cTagName <> "" Then
Dim EE As HtmlElementCollection = doc.GetElementsByTagName(cTagName)
For i = 0 To EE.Count - 1
If InStr(EE.Item(i).GetAttribute(cGetAttribute), FindText) > 0 _
And (Not StrictMatching Or InStr(FindText, EE.Item(i).GetAttribute(cGetAttribute)) > 0) Then
FindHtmlElement = EE.Item(i)
'WriteRunLog("Loop1")
'WriteRunLog("FindHtmlElement结束0")
Exit Function '找到就退出
End If
Next
Else
For i = 0 To doc.All.Count - 1
If InStr(doc.All.Item(i).GetAttribute(cGetAttribute), FindText) > 0 _
And (Not StrictMatching Or InStr(FindText, doc.All.Item(i).GetAttribute(cGetAttribute)) > 0) And (cTagName = "" Or LCase(cTagName) = LCase(doc.All.Item(i).TagName)) Then
FindHtmlElement = doc.All.Item(i)
'WriteRunLog("Loop1")
'WriteRunLog("FindHtmlElement结束0")
Exit Function '找到就退出
End If
Next
End If
'上面没找到,进行递归调用,递归会查找所有嵌套的Frame。
For k = 0 To doc.Window.Frames.Count - 1
'If k = 0 Then
' WriteRunLog("递归调用 doc.Window.Frames.Count:" + doc.Window.Frames.Count.ToString) 'For Test
'End If
'2018.3.14 直接 递归调用
'WriteRunLog("递归调用:" + Str(k))
' WriteRunLog("doc.Window.Frames.Item(k).Name:" + doc.Window.Frames.Item(k).Name)
FindHtmlElementOfDocument = doc.Window.Frames.Item(k).Document
FindHtmlElement = FindHtmlElement(FindText, doc.Window.Frames.Item(k).Document, cTagName, cGetAttribute, StrictMatching)
If Not FindHtmlElement Is Nothing Then '找到就退出循环
'WriteRunLog("FindHtmlElement结束1")
Exit Function
End If
Next
Catch ex As Exception
FindHtmlElement = Nothing
WriteRunLog("FindHtmlElement发生异常:" + ex.Message)
End Try
End Function
Sub WriteRunLog(ByVal MyMsg As String)
'Using w As StreamWriter = File.AppendText("RunLog.txt")
Dim w As StreamWriter
If File.Exists("RunLog.txt") Then
If My.Computer.FileSystem.GetFileInfo("RunLog.txt").Length > 10485760 Then '2017.5.4 文件大于10M,清0
w = File.CreateText("RunLog.txt")
w.Write("文件大于10M,置0从头开始!")
w.Write(Chr(9))
Else
w = File.AppendText("RunLog.txt")
End If
Else
w = File.CreateText("RunLog.txt")
End If
w.Write(Now)
w.Write(Chr(9)) '插入Tab键
w.WriteLine(MyMsg)
w.Flush()
w.Close()
'End Using
End Sub