|
本帖最后由 todaynew 于 2016-6-21 17:51 编辑
Private Function GetText() As Variant
'功能:从税务总局网页提取地址和项目名称
Dim xmlhttp As Object
Dim url As String
Dim txt As String
Dim Arr, i
Dim A()
url = "http://www.chinatax.gov.cn/n810214/n810621/n810668/index.html"
Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
With xmlhttp
.Open "GET", url, False
.send
txt = .responsetext
End With
txt = Mid(txt, InStr(txt, "<dd>"))
txt = Left(txt, InStrRev(txt, "</dd>") + 4)
txt = Replace(txt, "<dd class=" & Chr(34) & "sv_linel" & Chr(34) & "></dd>", "")
Arr = MatchArr(txt, "<.*>")
If Arr(0) = "True" Then
For i = 1 To UBound(Arr)
ReDim Preserve A(1, i - 1)
A(0, i - 1) = Mid(Arr(i), InStr(Arr(i), Chr(34)) + 1)
A(0, i - 1) = Left(A(0, i - 1), InStr(A(0, i - 1), Chr(34)) - 1)
A(0, i - 1) = Replace(A(0, i - 1), "../../..", "http://www.chinatax.gov.cn")
A(1, i - 1) = Mid(Arr(i), InStr(Arr(i), "</span>") + 7)
A(1, i - 1) = Left(A(1, i - 1), InStr(A(1, i - 1), "</a") - 1)
Next
End If
GetText = A
End Function
Public Function MatchArr(ByVal s As String, ByVal match_str As String) As Variant
'引用:Microsoft VBScript Regular Expressions 5.5
'功能:返回与正则表达式中捕获的子字符串
'参数:s--字符串,match_str--正则表达式
'示例:A=MatchArr(str,"sum\(v\(\d+,\d+,\d+\):v\(\d+,\d+,\d+\)\)")
Dim re As New regexp
Dim m As Match
Dim objs As MatchCollection
Dim A()
Dim i As Long, j As Long
re.Pattern = match_str
re.IgnoreCase = True
re.Global = True
ReDim A(0)
A(0) = CStr(re.Test(s))
If re.Test(s) = True Then
Set objs = re.Execute(s)
ReDim Preserve A(objs.Count)
i = 0
For Each m In objs
i = i + 1
A(i) = m.Value
Next
End If
MatchArr = A
Set re = Nothing
Set m = Nothing
Set objs = Nothing
End Function |
评分
-
查看全部评分
|