当前位置:编程学习 > JAVA >>

java网页数据抓取

对于加密的网站还没去研究,不知道能不能抓取,现在只是对一些没有加密的网站进行网页数据抓取。刚刚开始写的时候以为很多网站都能抓取,但是发现很多都加密了,本来以为一些地址可以通过网页数据检测工具测出他的数据变化,但是只能监测到一些通过js显示的数据,依然不能抓取到加密的网站。嗨,这个问题以后再说吧。
 [java]
import java.net.* ; 
import java.io.* ; 
import java.util.regex.* ; 
public class Capture{ 
    public static void main(String args[])throws Exception{ 
        System.out.println("*************************手机号查询************************") ; 
        System.out.println("我的位置是:" + new GrabMobile().grabMobileLocation("15023141745")) ; 
        System.out.println("手机卡类型是:" + new GrabMobile().grabMobileType("15023141745")) ; 
        System.out.println("我的邮编是:" + new GrabMobile().grabMobilePost("15023141745")) ; 
        System.out.println("*************************身份证查询************************") ; 
        System.out.println("我的性别是:" + new GrabIdentity().grabIdentitySex("362203199208243575")) ; 
        System.out.println("我的生日是:" + new GrabIdentity().grabIdentityBirth("362203199208243575")) ; 
        System.out.println("我的家乡是:" + new GrabIdentity().grabIdentityHome("362203199208243575")) ; 
    } 

class GrabMobile{ 
    public String grabMobileLocation(String m)throws Exception{ 
        String strUrl = "http://www.ip138.com:8080/search.asp?action=mobile&mobile=" + m; 
        URL url = new URL(strUrl) ; 
        HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ; 
        InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ; 
        BufferedReader bufRead = new BufferedReader(inRead) ; 
        StringBuffer strBuf = new StringBuffer() ; 
        String line = "" ; 
        while ((line = bufRead.readLine()) != null) { 
            strBuf.append(line); 
        } 
        String strStart = "卡号归属地" ; 
        String strEnd = "卡 类 型"; 
        String strAll = strBuf.toString() ; 
         
        int start = strAll.indexOf(strStart) ; 
         
        int end = strAll.indexOf(strEnd) ; 
         
        String result = strAll.substring(start+42,end-33) ; 
        result = drawChMob(result) ; 
        return result ; 
    } 
    public String grabMobileType(String m)throws Exception{ 
        String strUrl = "http://www.ip138.com:8080/search.asp?action=mobile&mobile=" + m; 
        URL url = new URL(strUrl) ; 
        HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ; 
        InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ; 
        BufferedReader bufRead = new BufferedReader(inRead) ; 
        StringBuffer strBuf = new StringBuffer() ; 
        String line = "" ; 
        while ((line = bufRead.readLine()) != null) { 
            strBuf.append(line); 
        } 
        String strStart = "卡 类 型" ; 
        String strEnd = "<TD align=\"center\">区 号</TD>"; 
        String strAll = strBuf.toString() ; 
         
        int start = strAll.indexOf(strStart) ; 
         
        int end = strAll.indexOf(strEnd) ; 
         
        String result = strAll.substring(start+12,end) ; 
        result = drawChMob(result) ; 
        result = result.substring(1) ; 
        return result ; 
    } 
    public String grabMobilePost(String m)throws Exception{ 
        String strUrl = "http://www.ip138.com:8080/search.asp?action=mobile&mobile=" + m; 
        URL url = new URL(strUrl) ; 
        HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ; 
        InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ; 
        BufferedReader bufRead = new BufferedReader(inRead) ; 
        StringBuffer strBuf = new StringBuffer() ; 
        String line = "" ; 
        while ((line = bufRead.readLine()) != null) { 
            strBuf.append(line); 
        } 
        String strStart = "邮 编" ; 
        String strEnd = "更详细的.."; 
        String strAll = strBuf.toString() ; 
         
        int start = strAll.indexOf(strStart) ; 
         
        int end = strAll.indexOf(strEnd) ; 
   &nb

补充:软件开发 , Java ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,