|
2#
楼主 |
发表于 2014-4-17 00:27:25
|
只看该作者
本帖最后由 tianping 于 2014-4-17 00:28 编辑
原代码
包含一小段抓取网页html代码(未包含登录)
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Drawing;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using System.Windows.Forms;
- using System.Text.RegularExpressions;
- using System.IO;
- using System.Net;
- namespace NetCapture
- {
- public partial class Form2 : Form
- {
- public Form2()
- {
- InitializeComponent();
- }
- private void button1_Click(object sender, EventArgs e)
- {
- this.dataGridView1.Columns.Clear();
- //定义表、表头、表体、表头单元格、数据行、数据单元格正则表达式
- Regex tableR =new Regex( @"<table[\s\S]*?</table>",RegexOptions.Multiline);
- Regex theadR = new Regex(@"<thead.*?</thead>");
- Regex headCellR = new Regex(@"(?<=<th.*?>)[^>]*?(?=</th>)");
- Regex rowR = new Regex(@"<tr.*?</tr>");
- Regex cellR = new Regex(@"(?<=<td.*?>)[^>]*?(?=</td>)");
- //定义变量
- int tableID = 0;
- int rowID = 0;
- int columnNum = 0;
- Dictionary<int,string> tableStrs = new Dictionary<int,string>();
- Dictionary<int, string> headStrs = new Dictionary<int, string>();
- Dictionary<int,string> rowStrs = new Dictionary<int, string>();
- Dictionary<int, string> cellStrs = new Dictionary<int, string>();
- string htmlStr = Regex.Replace(this.richTextBox1.Text,@"(\s)|(</a>)|(</font>)",""); //去掉所有的</a>和空格
- MatchCollection tableMatchs = tableR.Matches(htmlStr);
- if (tableMatchs.Count<1)return;
- // 取得匹配表
- foreach (Match s in tableMatchs)
- tableStrs[tableID++]=s.Value;
- //处理第一个表,分为行
- MatchCollection rowMatchs = rowR.Matches(tableStrs[0]);
- foreach (Match s in rowMatchs)
- rowStrs[rowID++] = s.Value;
- //取表头
- MatchCollection columnMatchs = headCellR.Matches(rowStrs[0]);
- //初始datagridview列
- columnNum = columnMatchs.Count;
- DataGridViewTextBoxColumn col;
- for (int i = 1; i <columnNum; i++)
- {
- col = new DataGridViewTextBoxColumn();
- col.Name ="col"+ i.ToString();
- col.HeaderText =columnMatchs[i].Value;
- this.dataGridView1.Columns.Add(col);
- }
- //处理每数据行成单元格
- for (int i = 1; i < rowStrs.Count; i++)
- {
- MatchCollection cellMatchs=cellR.Matches(rowStrs[i]);
- int columnID = 0;
- int index = this.dataGridView1.Rows.Add();
- for (int j = 1; j < columnNum; j++)
- {
- this.dataGridView1.Rows[index].Cells[columnID++].Value = cellMatchs[j].Value;
- }
-
- }
- }
-
- private void button2_Click(object sender, EventArgs e)
- {
- WebRequest request = WebRequest.Create(textBox2.Text);
- WebResponse response = request.GetResponse();
- StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));
- this.richTextBox1.Text = reader.ReadToEnd();
- reader.Close();
- reader.Dispose();
- response.Close();
- }
- }
- }
复制代码 |
|