winform 要采集的页面 该怎么做。50分
<script language="JavaScript" src="http://8234567.com/javascript.php?flag=djyzsua"></script>要采集的页面是这个该如何做,请告诉下,在线等 速度结贴50 --------------------编程问答-------------------- 难道是这个?
WebClient wc = new WebClient();--------------------编程问答-------------------- 不是,这个是我的代码,你可以看下,不用WebClient
Encoding enc = Encoding.GetEncoding("GB2312"); // 如果是乱码就改成 utf-8 / GB2312
Byte[] pageData = wc.DownloadData("http://8234567.com/"); // 从资源下载数据并返回字节数组。
string html = enc.GetString(pageData);
要采集<script language="JavaScript" src="http://8234567.com/javascript.php?flag=djyzsua"></script> 的内容。以js形式保存,但是这样的项目我第一次做,所以,想请教高手看看,该怎么获取,我我要采集的页面的内容,因为其中包含了,java Script
using System;
using System.Collections.Generic;
//using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using Sgml;
using System.Xml;
using System.Data.OleDb;
using System.Collections;
using System.Timers;
namespace CatchDiv
{
class Program
{
static string today = "1-1";
static string news = "1-1";
static List<string> list = new List<string>();
public static void show(object sender, EventArgs args)
{
today = DateTime.Now.Month.ToString() + "-" + DateTime.Now.Day.ToString();
if (!list.Contains(today))
{
if (DateTime.Now.Hour == 18 /*&& DateTime.Now.Minute == 1*/)
{
// write();
Console.WriteLine("成功添加 " + today + " 日的历史上的今天所存在的事件");
list.Add(today);
}
//show(null, null);
Console.ReadLine();
//bbs的获取地址
string html = GetHtml("http://e.jznews.com.cn/indexmain/", "id");
//string html = GetHtml
Console.WriteLine(html);
string names = DateTime.Now.Minute.ToString() + DateTime.Now.Second.ToString();
//string path = @"F:\e-ms" + names + ".js";
string path = @"F:\e-ms" + names + ".html";
System.IO.File.AppendAllText(path, html);
Console.Read();
}
}
//public static void TimeDoing()
//{
// Timer t = new Timer(10000);
// t.Elapsed = new System.Timers.ElapsedEventHandler(Main);
// t.AutoReset = true;
// t.Enabled = true;
//}
/// <summary>
/// 捕获页面HTML代码
/// </summary>
static void Main(string[] args)
{
System.Threading.Thread.Sleep(3000);
Timer timer = new Timer();
timer.Interval = 6000;//一秒钟获取一次,
timer.Elapsed += new ElapsedEventHandler(show);
timer.Start();
//show(null, null);
Console.ReadLine();
//bbs的获取地址
string html = GetHtml("http://8234567.com/javascript.php?flag=djyzsua/", "src");
// E-线民生的获取地址
//string html = GetHtml("http://e.jznews.com.cn/indexmain/", "id");
Console.WriteLine(html);
string name = DateTime.Now.Minute.ToString() + DateTime.Now.Second.ToString();
string path = @"F:\bbs" + name + ".js";
//string path = "document.write(\"" + name + "\")";
System.IO.File.AppendAllText(path, html);
Console.Read();
}
public static string GetHtml(string url, string bm)
{
WebResponse response = null;
Stream stream = null;
StreamReader reader = null;
WebRequest request = WebRequest.Create(url);
response = request.GetResponse();
stream = response.GetResponseStream();
try
{
reader = new StreamReader(stream, System.Text.Encoding.GetEncoding(bm));
}
catch
{
reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("GB2312"));
}
string pagehtml = reader.ReadToEnd();
return pagehtml;
}
补充:.NET技术 , ASP.NET