ASP.NET

it2025-10-28  5

using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using HtmlAgilityPack;//引用爬虫DLL using System.Text; using DotNet; using System.Net; using System.IO; public partial class _Default : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { kwldg_Reptile(); } //腾讯家居首页(热门产品)爬虫 private void TencentHome_HotProdcut_Reptile() { //抓取地址 string url = "http://hm.jia360.com/"; //实例化HtmlWeb对象 HtmlWeb web = new HtmlWeb(); //创建html文档,并接受返回参数 HtmlDocument htmldoc = web.Load(url);//加载url //获取li标签下的所有a标签节点 HtmlNodeCollection aCollection = htmldoc.DocumentNode.SelectNodes("//*[starts-with(@class,'tab_box ')]//li/a"); //遍历a标签集合 foreach (var item in aCollection) { //获取a标签text string title = item.InnerText; //获取a标签href string href = item.Attributes["href"].Value; //获取img标签src string imgpath = item.SelectSingleNode("./img/@src").Attributes["src"].Value; //图片保存路径 string SavePath = Server.MapPath("~/upload/link/" + Path.GetFileName(imgpath)); //下载图片 WebClient wc = new WebClient(); wc.DownloadFile(imgpath, SavePath); //输出 Response.Write(title + "<br/>"); Response.Write(href + "<br/>"); Response.Write(imgpath + "<br/>"); } } //98工作室(知识库页)爬虫 private void kwldg_Reptile() { //抓取地址 string url = "http://98keji.com/article/article_list.aspx?pn=1"; //实例化HtmlWeb对象 HtmlWeb web = new HtmlWeb(); //创建html文档,并接受返回参数 HtmlDocument htmldoc = web.Load(url);//加载url //获取li标签下的所有a标签节点 HtmlNodeCollection aCollection = htmldoc.DocumentNode.SelectNodes("//*[starts-with(@class,'article_list ')]//li/a"); //遍历a标签集合 foreach (var item in aCollection) { //获取a标签text string title = item.InnerText; //获取a标签href string href = item.Attributes["href"].Value; //输出 Response.Write(title + "<br/>"); Response.Write(href + "<br/>"); } } }
最新回复(0)