asp正则获取html文件图片地址
Function getIMG(sString)
Dim sReallyDo, regEx, iReallyDo
Dim oMatches, cMatch
'//定义一个空数组
iReallyDo = -1
ReDim aReallyDo(iReallyDo)
If IsNull(sString) Then
getIMG = ""
Exit Function
End If
sReallyDo = sString
'//格式化HTML代码
sReallyDo = Replace(sReallyDo, vbCr, " ")
sReallyDo = Replace(sReallyDo, vbLf, " ")
sReallyDo = Replace(sReallyDo, vbTab, " ")
sReallyDo = ReplaceAll(sReallyDo, " ", " ", True)
sReallyDo = Replace(sReallyDo, "><", ">" & vbCrLf & "<")
sReallyDo = Replace(sReallyDo, "> <", ">" & vbCrLf & "<")
Set regEx = New RegExp
regEx.IgnoreCase = True
regEx.Global = True
'regEx.MultiLine = True
'//去除onclick,onload等脚本
regEx.Pattern = "\s[on].+?=([\""|\'])(.*?)\1"
sReallyDo = regEx.Replace(sReallyDo, "")
'//获取SRC带引号的图片地址
regEx.Pattern = "<img.*?\ssrc=([\""\'])(.[^\""\']+)\1.*?>"
Set oMatches = regEx.Execute(sReallyDo)
'//将图片地址存入数组
For Each cMatch in oMatches
iReallyDo = iReallyDo + 1
ReDim Preserve aReallyDo(iReallyDo)
aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$2")
Next
' regEx.MultiLine = False
'//获取SRC不带引号的图片地址
regEx.Pattern = "<img.*?\ssrc=([^\""\']\S*).*?>"
Set oMatches = regEx.Execute(sReallyDo)
'//将图片地址存入数组
For Each cMatch in oMatches
iReallyDo = iReallyDo + 1
ReDim Preserve aReallyDo(iReallyDo)
aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$1")
Next
getIMG = aReallyDo
End Function
%>
补充:asp教程,高级应用