求大家讲解一下c#网页采集器的代码

作业要求：利用C#实现一个网络采集器。功能可参照火车采集器，重点功能在于数据采集，包括网络地址的批量采集，页面内容过滤，采集结果保存等。
代码求大家讲解一下这些部分：button2_Click button1_Click void getinformation refine saveas geturl button2_Click button5_Click
我会把剩余的分全追加了，求大家仔细讲解，我是新手

设计界面截图：

][/img]

主要代码：

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Diagnostics;
using System.Text.RegularExpressions;
using System.Collections;
namespace 网络文本挖掘器
{

    public partial class Form1 : Form
    {
        public static string s1="http://www.2345.com";
        public static string filename;
        public static string context;
        public static string filepath="";

        public Form1()
        {
            InitializeComponent();
        }
        private void textBox1_TextChanged(object sender, EventArgs e)
        {
            s1=textBox1.Text.ToString();
        }


        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {

        }
        private void button4_Click(object sender, EventArgs e)
        {
            try
            {
                webBrowser1.Navigate(new Uri(s1));
            }
            catch
            {
                MessageBox.Show("无效的网址！！！你的智商令人捉鸡呀！！！");
                s1 = "";
            }
        }
        private void button1_Click(object sender, EventArgs e)
        {

            try
            {  WebClient myWebclient = new WebClient();
                byte[] mydatabuffer = myWebclient.DownloadData(s1);
                string ss= Encoding.UTF8.GetString(mydatabuffer);
                filename = "当前页面";
                saveas(filename, ss);
            }
            catch
            {
                MessageBox.Show("无效的网址！！！你的智商令人捉鸡呀！！！");
                s1 = "";
            }
        }

        private static void getinformation(string url)
        {
            try
            {
                int start1,start2,end1,end2,start3,temp;
                WebClient myWebclient = new WebClient();
                byte[] mydatabuffer = myWebclient.DownloadData(url);
                string ss = Encoding.UTF8.GetString(mydatabuffer);
                start1 = ss.IndexOf(@"CNNVD编号：</td>");
                end1 = ss.IndexOf(@"m", start1, ss.Length - start1);
                start2 = ss.IndexOf(@"cnnvd3_12_24.jpg");
                start3 = ss.IndexOf(@"m", start2, ss.Length - start2);
                temp = start3;
               do
                {
                    end2 = ss.IndexOf(@"<", temp, ss.Length - temp);
                    temp = end2+1;
                }
                while (ss.Substring(end2 + 1, 1) != @"/");
                context=refine(ss.Substring(start3+3,end2-start3-3));
                filename = refine(ss.Substring(end1+3, 16));
            }
            catch
            {
                MessageBox.Show("网址无效！！！智商啊！！！");
            }

        }
        private static string refine(string s)
        {
            string sy;
           sy=s.Replace("<br/>","");

           sy=sy.Replace(" ", "\n");

           return sy;


        }
        private static void saveas(string filename, string context)
        {
            if (filepath == "")
            {
                FileInfo file = new FileInfo(@"D:\\" + filename + ".txt");
                StreamWriter sw = file.AppendText();
                sw.Write(context);
                sw.Close();
                sw.Dispose();
            }
            else
            {
                try
                {
                    FileInfo file = new FileInfo(@filepath + filename + ".txt");
                    StreamWriter sw = file.AppendText();
                    sw.Write(context);
                    sw.Close();
                    sw.Dispose();
                }
                catch
                {
                    MessageBox.Show("文件路径输入错误，存入默认路径！自己复制粘贴去");
                }
            }


        }
        private static string geturl(string s)
        {
            int i = 0;
            int count = 2;
            string temp = s1;
            char[] c = s.ToCharArray();
            for (i = 0; i < s.Length; i++)
            {
                if (c[i] == '/')
                { count--; }
                 if(count==0)
                     break;
            }
            for (int j = i; j < s.Length-1; j++)
            {
                temp += c[j];
            }

            return temp;
        }
        private void button2_Click(object sender, EventArgs e)
        {
            try
            {
                System.Net.WebClient client = new WebClient();
                byte[] page = client.DownloadData(s1);
                string content = System.Text.Encoding.UTF8.GetString(page);
                string regex = "href=[\\\"\\\'](http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?[\\\"\\\']";
                Regex re = new Regex(regex);

                MatchCollection matches = re.Matches(content);
                System.Collections.IEnumerator enu = matches.GetEnumerator();
                this.comboBox1.Items.Clear();
                this.comboBox2.Items.Clear();
                while (enu.MoveNext() && enu.Current != null)
                {
                    Match match = (Match)(enu.Current);
                    this.comboBox1.Items.Add(match.Value);
                    this.comboBox2.Items.Add(match.Value);
                 }
                MessageBox.Show("网址分析完毕，请通过下拉式菜单选取需要批量采集的范围");
            }
            catch
            {
                MessageBox.Show("无效的网址！！！你的智商令人捉鸡呀！！！");
            }
        }
        private void button3_Click(object sender, EventArgs e)
        {
            this.Close();
            Application.Exit();
        }
        private void button5_Click(object sender, EventArgs e)
        {

            if (this.comboBox1.SelectedIndex >this.comboBox2.SelectedIndex||this.comboBox1.SelectedIndex==-1||this.comboBox2.SelectedIndex==-1)
            {
                MessageBox.Show("开头比结束位置还靠后，你是不是傻呀！");
            }
            else
            {
                for (int i = this.comboBox1.SelectedIndex; i <= this.comboBox2.SelectedIndex; i++)
                {
                    getinformation(geturl(this.comboBox1.Items[i].ToString()));
                    saveas(filename, context);
                }
                MessageBox.Show("采集完成，已存入制定文件夹，请查阅");
            }

        }

        private void textBox2_TextChanged(object sender, EventArgs e)
        {
            filepath = textBox2.Text.ToString();
        }

        private void comboBox1_SelectedIndexChanged(object sender, EventArgs e)
        {

        }

        private void comboBox2_SelectedIndexChanged(object sender, EventArgs e)
        {

        }
    }
}
--------------------编程问答--------------------

--------------------编程问答-------------------- button4_Click 将浏览器转到输入的url
button1_Click 保存当前页面
geturl 提取url，其实就是去掉 http:// （汗） --------------------编程问答-------------------- button3_Click 关闭
button2_Click 提取链接，用了正则表达式
getinformation 从特定网页提取特定数据

总结：代码很烂很烂很烂；而且要么是一个人拼凑粘贴的，要不然是几个人写的，同样的事情，用了不止一种方法实现。写这个代码的人的智商堪比驴子。 --------------------编程问答-------------------- 怎么看都不像我写的，还好，还好 --------------------编程问答-------------------- button2_Click   //提取链接，用了正则表达式
button1_Click  //扒取页面
void getinformation   //应该是匹配页面内容
refine   //过滤特殊字符
saveas   //将拔取的页面存放到本地磁盘  txt
geturl  //提取url
button5_Click  //没什么基本验证。然后调用void getinformation

其实，你看messagebox.show()里的内容也能分析个八九不离十了！

补充：.NET技术 ,  C#