当前位置:编程学习 > php >>

抓取“中国 IC 网”供应商程序

<?php 
/** 
* 抓取“中国 IC 网(http://www.ic37.com)”供应商主程序 
* author Lee. 
* Last modify $Date: 2012-2-9 9:32:21 $ 
* 注:本程序按照编码 GB2312 执行,因为“中国 IC 网”网站是GB2312编码,数据库也得保持一致
*/ 
class ic37 { 
    private $key; // 型号  
    private $pageNum; // 页码  
 
    /**
     * 入口程序
     */ 
    public function go($key) { 
        $this->key = $key; 
        $this->pageNum = $this->getPageNum(); 
        $this->getInfo(); 
    } 
 
    /**
     * 获取供应商 url 链接数组
     * @return ArrayObject
     */ 
    private function getInfo() { 
        if ($this->pageNum==1) { # 处理只有一页的情况 
            $arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent())); 
            $this->isAddSuccess($arr); 
        } elseif ($this->pageNum>1) { # 多页 
            for ($i=1; $i<=$this->pageNum; $i++) { 
                $arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent($i))); 
                $this->isAddSuccess($arr); 
            } 
        }    
    } 
     
    /**
     * 打印是否添加成功
     * @param ArrayObject $arr
     * @return string
     */ 
    private function isAddSuccess($arr) { 
        foreach ($arr as $k=>$v) { 
            if ($this->execAdd($this->getInfoByShopUrl($v))) { 
                echo 'Add Success!!'; 
            } else { 
                echo 'Add Faild!!'; 
            } 
        } 
    } 
 
    /**
     * 执行添加到数据库
     * @param ArrayObject $infoArr
     * @return Number 受影响的行数
     */ 
    private function execAdd($infoArr) { 
        $mysqli = $this->getDb(); 
        if (!emptyempty($infoArr['company'])) { 
            if (!$this->isExists($mysqli, $infoArr)) { 
                $num = $mysqli->query("INSERT INTO ic37(company,person,phone,mobile,qq,msn,fax,email,address,country,region,zip,web,shopUrl) VALUES ('{$infoArr['company']}','{$infoArr['person']}','{$infoArr['phone']}','{$infoArr['mobile']}','{$infoArr['qq']}','{$infoArr['msn']}','{$infoArr['fax']}','{$infoArr['email']}','{$infoArr['address']}','{$infoArr['country']}','{$infoArr['region']}','{$infoArr['zip']}','{$infoArr['web']}','{$infoArr['shopUrl']}')"); 
                return $num; 
            } else { 
                return false; # 表示数据已经存在 
            } 
        } else { 
            return false; 
        } 
    } 
     
    private function formatStr($str) { 
        $str = trim($str); 
        $str = str_replace(' ', '', $str); 
        $str = str_replace('==联系我们', '', $str); 
        return $str; 
    } 
 
    /**
     * 连接数据库
     */ 
    private function getDb() { 
        $mysqli = new mysqli('localhost', 'root', '1715544', 'weiku'); 
        $mysqli->query('SET NAMES GB2312'); 
        return $mysqli; 
    } 
 
    /**
     * 检查公司是否已经存在
     * @param Resource $mysqli
     * @param ArrayObject $infoArr
     * @return bool
     */ 
    private function isExists($mysqli, $infoArr) { 
        $mysqli->query("SELECT company FROM weiku WHERE company = '{$infoArr['company']}'"); 
        if ($mysqli->affected_rows) { 
            return true; 
        } else { 
            return false; 
        } 
    } 
 
    /**
     * 抓取信息
     * @param $url 
     * @return ArrayObject
     */ 
    private function getInfoByShopUrl($url) { 
        $re = preg_replace('/<a.+>(.*)<\/a>/', '\1', str_replace('</font>', '', str_replace('<font color="#000099">',

补充:Web开发 , php ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,