当前位置:编程学习 > asp >>

asp正则获取html文件图片地址

Function getIMG(sString)
    Dim sReallyDo, regEx, iReallyDo
    Dim oMatches, cMatch
    '//定义一个空数组
    iReallyDo = -1
    ReDim aReallyDo(iReallyDo)
    If IsNull(sString) Then
        getIMG = ""
        Exit Function
    End If
    sReallyDo = sString
    '//格式化HTML代码
    sReallyDo = Replace(sReallyDo, vbCr, " ")
    sReallyDo = Replace(sReallyDo, vbLf, " ")
    sReallyDo = Replace(sReallyDo, vbTab, " ")
    sReallyDo = ReplaceAll(sReallyDo, "  ", " ", True)
    sReallyDo = Replace(sReallyDo, "><", ">" & vbCrLf & "<")
    sReallyDo = Replace(sReallyDo, "> <", ">" & vbCrLf & "<")
    Set regEx = New RegExp
    regEx.IgnoreCase = True
    regEx.Global = True
    'regEx.MultiLine = True
    '//去除onclick,onload等脚本
    regEx.Pattern = "\s[on].+?=([\""|\'])(.*?)\1"
    sReallyDo = regEx.Replace(sReallyDo, "")
    '//获取SRC带引号的图片地址
    regEx.Pattern = "<img.*?\ssrc=([\""\'])(.[^\""\']+)\1.*?>"
    Set oMatches = regEx.Execute(sReallyDo)
    '//将图片地址存入数组
    For Each cMatch in oMatches
        iReallyDo = iReallyDo + 1
        ReDim Preserve aReallyDo(iReallyDo)
        aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$2")
    Next
    '       regEx.MultiLine = False
    '//获取SRC不带引号的图片地址
    regEx.Pattern = "<img.*?\ssrc=([^\""\']\S*).*?>"
    Set oMatches = regEx.Execute(sReallyDo)
    '//将图片地址存入数组
    For Each cMatch in oMatches
        iReallyDo = iReallyDo + 1
        ReDim Preserve aReallyDo(iReallyDo)
        aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$1")
    Next
    getIMG = aReallyDo
End Function
%>

补充:asp教程,高级应用 
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,