文章目录
前言
小学语文中,拼音、笔画、部首、组词等是必学、必考内容。家长不能随时辅导怎么办?有VBA,一键爬取网络数据。
本次使用的网站网址为:https://www.putongtianxia.com/。
该网站有个小缺点,不能区分多音字,多音字的拼音只有一个。
一、代码
1.创建数据发送及返回方法
c
Function sendAndget1(url As String, resultA As String) '创建数据发送及返回方法
Dim re As Object
Dim rl As Object
Dim st As Object
On Error Resume Next
Set xmlhttp = CreateObject("msxml2.xmlhttp")
xmlhttp.Open "GET", url, False
xmlhttp.SEND
If xmlhttp.READYSTATE = 4 Then
a = StrConv(xmlhttp.RESPONSEBODY, vbUnicode)
End If
Set re = CreateObject("vbscript.RegExp")
With re
.IgnoreCase = True
.Global = True
.Pattern = "utf-8|gb2312|gbk"
Set rl = .Execute(a)
End With
ch = rl.Item(0)
Set st = CreateObject("adodb.stream")
With st
.Mode = 3
.Type = 1
.Open
.write xmlhttp.RESPONSEBODY
.Position = 0
.Type = 2
.Charset = ch
resultA = .readtext
.Close
End With
End Function
2.汉字转UTF8编码
c
Function strToUtf8(str As String) As String '汉字转UTF8编码
Dim wch As String
Dim uch As String
Dim szRet As String
Dim x As Long
Dim inputLen As Long
Dim nAsc As Long
Dim nAsc2 As Long
Dim nAsc3 As Long
If str = "" Then
strToUtf8 = str
Exit Function
End If
inputLen = Len(str)
For x = 1 To inputLen
wch = Mid(str, x, 1)
nAsc = AscW(wch)
'对于<0的编码 其需要加上65536
If nAsc < 0 Then nAsc = nAsc + 65536
'对于<128位的ASCII的编码则无需更改
If (nAsc And &HFF80) = 0 Then
szRet = szRet & wch
Else
If (nAsc And &HF000) = 0 Then
uch = "%" & Hex(((nAsc \ 2 ^ 6)) Or &HC0) & Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
Else
uch = "%" & Hex((nAsc \ 2 ^ 12) Or &HE0) & "%" & _
Hex((nAsc \ 2 ^ 6) And &H3F Or &H80) & "%" & _
Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
End If
End If
Next
strToUtf8 = szRet
End Function
3.拆分数组
c
Function arrResult(str As String, arrSP() As String, arrLi() As String)
Dim arrR() As String
Dim i, j, m, n As Integer
Dim url As String
Dim resultA As String
Dim utfstr As String
utfstr = strToUtf8(str)
url = "https://bishun.putongtianxia.com/" & utfstr & "_bishun"
Call sendAndget1(url, resultA) '调用返回数据方法,根据返回数据截取有用信息
ReDim arrR(Len(resultA))
ReDim arrSP(Len(resultA))
ReDim arrLi(Len(resultA))
arrR = Split(resultA, " ")
j = UBound(arrR) - LBound(arrR) + 1
For i = 0 To j - 1
If arrR(i) Like "*<span>*</span>*" Then
arrSP(m) = arrR(i)
m = m + 1
ElseIf arrR(i) Like "*<li>*</li>*" Then
arrLi(n) = arrR(i)
n = n + 1
End If
Next
End Function
4.获取拼音
c
Function pinyin(str As String) As String
Dim arrSP() As String
Dim arrLi() As String
Dim tmp As String
ReDim arrSP(4)
Call arrResult(str, arrSP(), arrLi())
tmp = Left(arrSP(0), Len(arrSP(0)) - 8)
pinyin = Right(tmp, Len(tmp) - 6)
End Function
5.获取部首
c
Function bushou(str As String) As String
Dim arrSP() As String
Dim arrLi() As String
Dim tmp As String
Dim c As String
Dim i As Integer
ReDim arrSP(4)
Call arrResult(str, arrSP(), arrLi())
For i = 1 To Len(arrSP(3))
c = Mid(arrSP(3), i, 1)
If c Like "*[一-龥]*" Then
bushou = c
End If
Next
End Function
6.获取繁体
c
Function fanti(str As String) As String
Dim arrSP() As String
Dim arrLi() As String
Dim c As String
Dim i As Integer
ReDim arrSP(4)
Call arrResult(str, arrSP(), arrLi())
For i = 1 To Len(arrSP(1))
c = Mid(arrSP(1), i, 1)
If c Like "*[一-龥]*" Then
fanti = c
End If
Next
End Function
7.获取笔画
c
Function bihua(str As String) As String
Dim arrSP() As String
Dim arrLi() As String
Dim c As String
Dim i As Integer
ReDim arrSP(4)
Call arrResult(str, arrSP(), arrLi())
For i = 1 To Len(arrSP(2))
c = Mid(arrSP(2), i, 1)
If IsNumeric(c) Then
bihua = bihua & c
End If
Next
End Function
8.组词
c
Function zuci(str As String) As String
Dim arrSP() As String
Dim arrLi() As String
Dim arrLiLen As Integer
Dim c As String
Dim i, j As Integer
ReDim arrLi(100)
Call arrResult(str, arrSP(), arrLi())
arrLiLen = UBound(arrLi) - LBound(arrLi) + 1
For i = 0 To arrLiLen - 1
If arrLi(i) <> "" Then
For j = 1 To Len(arrLi(i))
c = Mid(arrLi(i), j, 1)
If c Like "*[一-龥]*" Then
zuci = zuci & c
End If
Next
zuci = zuci & "、"
Else
Exit Function
End If
Next
End Function