c从网页提取数据.docx
- 文档编号:8996356
- 上传时间:2023-02-02
- 格式:DOCX
- 页数:12
- 大小:18.72KB
c从网页提取数据.docx
《c从网页提取数据.docx》由会员分享,可在线阅读,更多相关《c从网页提取数据.docx(12页珍藏版)》请在冰豆网上搜索。
c从网页提取数据
下面的函数作用,将DataTable导出到EXCEL文件:
privatevoidDataTabletoExcel(System.Data.DataTabletmpDataTable,stringstrFileName)
{
if(tmpDataTable==null)
{
return;
}
introwNum=tmpDataTable.Rows.Count;
intcolumnNum=tmpDataTable.Columns.Count;
introwIndex=1;
intcolumnIndex=0;
Excel.ApplicationxlApp=newExcel.ApplicationClass();
xlApp.DefaultFilePath="";
xlApp.DisplayAlerts=true;
xlApp.SheetsInNewWorkbook=1;
Excel.WorkbookxlBook=xlApp.Workbooks.Add(true);
//将DataTable的列名导入Excel表第一行
foreach(DataColumndcintmpDataTable.Columns)
{
columnIndex++;
xlApp.Cells[rowIndex,columnIndex]=dc.ColumnName;
}
//将DataTable中的数据导入Excel中
for(inti=0;i { rowIndex++; columnIndex=0; for(intj=0;j { columnIndex++; xlApp.Cells[rowIndex,columnIndex]=tmpDataTable.Rows[i][j].ToString(); } } xlBook.SaveCopyAs(strFileName+".xls"); } C#code usingSystem; usingSystem.Text.RegularExpressions; usingSystem.IO; usingSystem.Text; usingSystem.Net; usingSystem.IO.Compression; usingSystem.Web; usingSystem.Collections; namespace控制台测试 { classProgram { staticvoidMain(string[]args) { byte[]buffer=getBytes("null,null); stringhtml=Encoding.UTF8.GetString(buffer); MatchCollectionmc=Regex.Matches(html,@"{""BonusNumberString"": ""(? ""(? ""(? foreach(Matchminmc) { Console.WriteLine(m.Groups["qi"]+""+m.Groups["num"].Value.Replace(",","")+""+m.Groups["kai"]); } Console.WriteLine("程序运行结束,按任意键关闭窗口! "); Console.ReadKey(); } //读取网络资源,返回字节数组 privatestaticbyte[]getBytes(stringurl,CookieContainercookie,byte[]postData) { intc=url.IndexOf("/",10); byte[]data=null; HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(url); request.AllowAutoRedirect=true; if(cookie! =null)request.CookieContainer=cookie; request.Referer=(c>0? url.Substring(0,c): url); request.UserAgent="Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)"; request.Headers[HttpRequestHeader.AcceptEncoding]="gzip,deflate"; if(postData! =null)//需要Post数据 { request.Method="POST"; request.ContentType="application/x-www-form-urlencoded"; request.ContentLength=postData.Length; StreamrequestStream=request.GetRequestStream(); requestStream.Write(postData,0,postData.Length); requestStream.Close(); } HttpWebResponseresponse=(HttpWebResponse)request.GetResponse(); stringce=response.Headers[HttpResponseHeader.ContentEncoding]; intContentLength=(int)response.ContentLength; Streams=response.GetResponseStream(); c=1024*10; if(ContentLength<0)//不能获取数据的长度 { data=newbyte[c]; MemoryStreamms=newMemoryStream(); intl=s.Read(data,0,c); while(l>0) { ms.Write(data,0,l); l=s.Read(data,0,c); } data=ms.ToArray(); ms.Close(); } else//数据长度已知 { data=newbyte[ContentLength]; intpos=0; while(ContentLength>0) { intl=s.Read(data,pos,ContentLength); pos+=l; ContentLength-=l; } } s.Close(); response.Close(); if(ce=="gzip")//若数据是压缩格式,则要进行解压 { MemoryStreamjs=newMemoryStream();//解压后的流 MemoryStreamms=newMemoryStream(data);//用于解压的流 GZipStreamg=newGZipStream(ms,CompressionMode.Decompress); byte[]buffer=newbyte[c];//读数据缓冲区 intl=g.Read(buffer,0,c);//一次读10K while(l>0) { js.Write(buffer,0,l); l=g.Read(buffer,0,c); } g.Close(); ms.Close(); data=js.ToArray(); js.Close(); } returndata;//返回字节数组 } } } 运行结果: 20100625-083907232010-06-2519: 50 20100625-082613712010-06-2519: 40 20100625-081309382010-06-2519: 30 20100625-080138602010-06-2519: 20 20100625-079432382010-06-2519: 10 20100625-078681452010-06-2519: 00 20100625-077203032010-06-2518: 50 20100625-076533652010-06-2518: 40 20100625-075803612010-06-2518: 30 20100625-074761212010-06-2518: 20 20100625-073508112010-06-2518: 10 20100625-072360942010-06-2518: 00 20100625-071913812010-06-2517: 50 20100625-070065152010-06-2517: 40 20100625-069689052010-06-2517: 30 20100625-068622752010-06-2517: 20 20100625-067437852010-06-2517: 10 20100625-066435082010-06-2517: 00 20100625-065823072010-06-2516: 50 20100625-064630792010-06-2516: 40 20100625-063171782010-06-2516: 30 20100625-062296232010-06-2516: 20 20100625-061153362010-06-2516: 10 20100625-060253502010-06-2516: 00 20100625-059478022010-06-2515: 50 20100625-058022212010-06-2515: 40 20100625-057578882010-06-2515: 30 20100625-056410192010-06-2515: 20 20100625-055005002010-06-2515: 10 20100625-054095622010-06-2515: 00 20100625-053003372010-06-2514: 50 20100625-052694352010-06-2514: 40 20100625-051453142010-06-2514: 30 20100625-050450872010-06-2514: 20 20100625-049937432010-06-2514: 10 20100625-048562802010-06-2514: 00 20100625-047112072010-06-2513: 50 20100625-046556422010-06-2513: 40 20100625-045320702010-06-2513: 30 20100625-044676242010-06-2513: 20 20100625-043742272010-06-2513: 10 20100625-042508082010-06-2513: 00 20100625-041636572010-06-2512: 50 20100625-040019642010-06-2512: 40 20100625-039873152010-06-2512: 30 20100625-038867832010-06-2512: 20 20100625-037552632010-06-2512: 10 20100625-036595782010-06-2512: 00 20100625-035183962010-06-2511: 50 20100625-034678262010-06-2511: 40 20100625-033064462010-06-2511: 30 20100625-032439162010-06-2511: 20 20100625-031429442010-06-2511: 10 20100625-030631492010-06-2511: 00 20100625-029047562010-06-2510: 50 20100625-028565872010-06-2510: 40 20100625-027221102010-06-2510: 30 20100625-026522932010-06-2510: 20 20100625-025060402010-06-2510: 10 20100625-024979922010-06-2510: 00 20100625-023049152010-06-2501: 55 20100625-022812202010-06-2501: 50 20100625-021336832010-06-2501: 45 20100625-020411642010-06-2501: 40 20100625-019980972010-06-2501: 35 20100625-018056272010-06-2501: 30 20100625-017126362010-06-2501: 25 20100625-016046672010-06-2501: 20 20100625-015887182010-06-2501: 15 20100625-014590382010-06-2501: 10 20100625-013669442010-06-2501: 05 20100625-012076632010-06-2501: 00 20100625-011181992010-06-2500: 55 20100625-010379632010-06-2500: 50 20100625-009168432010-06-2500: 45 20100625-008310372010-06-2500: 40 20100625-007693502010-06-2500: 35 20100625-006597642010-06-2500: 30 20100625-005793752010-06-2500: 25 20100625-004414392010-06-2500: 20 20100625-003801382010-06-2500: 15 20100625-002427172010-06-2500: 10 20100625-001835542010-06-2500: 05 20100624-120612802010-06-2500: 00 20100624-119007472010-06-2423: 55 20100624-118614352010-06-2423: 50 20100624-117918162010-06-2423: 45 20100624-116294732010-06-2423: 40 20100624-115283882010-06-2423: 35 20100624-114890152010-06-2423: 30 20100624-113565862010-06-2423: 25 20100624-112920122010-06-2423: 20 20100624-111043332010-06-2423: 15 20100624-110613662010-06-2423: 10 20100624-109302312010-06-2423: 05 20100624-108325482010-06-2423: 00 20100624-107315222010-06-2422: 55 20100624-106347672010-06-2422: 50 20100624-105870772010-06-2422: 45 20100624-104965932010-06-2422: 40 20100624-103073992010-06-2422: 35 20100624-102617732010-06-2422: 30 20100624-101046222010-06-2422: 25 20100624-100753522010-06-2422: 20 20100624-099303352010-06-2422: 15 20100624-098394592010-06-2422: 10 20100624-097281542010-06-2422: 05 20100624-096200112010-06-2422: 00 20100624-095327782010-06-2421: 50 20100624-094382442010-06-2421: 40 20100624-093755902010-06-2421: 30 20100624-092813222010-06-2421: 20 20100624-091685412010-06-2421: 10 20100624-090818552010-06-2421: 00 20100624-089883212010-06-2420: 50 20100624-088952302010-06-2420: 40 20100624-087793022010-06-2420: 30 20100624-086086242010-06-2420: 20 20100624-085870482010-06-2420: 10 20100624-084827102010-06-2420: 00 程序运行结束,按任意键关闭窗口! 抓取Web网页数据分析(c#) 通过程序自动的读取其它网站网页显示的信息,类似于爬虫程序。 比方说我们有一个系统,要提取BaiDu网站上歌曲搜索排名。 分析系统在根据得到的数据进行数据分析。 为业务提供参考数据。 为了完成以上的需求,我们就需要模拟浏览器浏览网页,得到页面的数据在进行分析,最后把分析的结构,即整理好的数据写入数据库。 那么我们的思路就是: 1、发送HttpRequest请求。 2、接收HttpResponse返回的结果。 得到特定页面的html源文件。 3、取出包含数据的那一部分源码。 4、根据html源码生成HtmlDocument,循环取出数据。 5、写入数据库。 程序如下: //根据Url地址得到网页的html源码 privatestringGetWebContent(stringUrl) { stringstrResult=""; try { HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(Url); //声明一个HttpWebRequest请求 request.Timeout=30000; //设置连接超时时间 request.Headers.Set("Pragma","no-cache"); HttpWebResponseresponse=(HttpWebResponse)request.GetResponse(); StreamstreamReceive=response.GetResponseStream(); Encodingencoding=Encoding.GetEncoding("GB2312"); StreamReaderstreamReader=newStreamReader(streamReceive,encoding); strResult=streamReader.ReadToEnd(); } catch { MessageBox.Show("出错"); } returnstrResult; } 为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用 usingSystem.Net; 以下是程序具体实现过程: privatevoidbutton1_Click(objectsender,EventArgse) { //要抓取的URL地址 stringUrl=" //得到指定Url的源码 stringstr
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 网页 提取 数据