页面爬虫(获取其他页面HTML)加载到自己页面示例

2019-11-12 13:30asp.net教程网 ASP.NET教程

利用页面爬虫(获取其他页面HTML)加载到自己页面，实现所谓的小偷程序吧，具体实现代码如下，感兴趣的朋友可以参考下哈

代码如下:

	
	//前台 

	<div id="showIframe"></div> 

	$(document).ready(function() { 

	var url = "@Url.Action("GetPageHtml","Catalog")"; 

	$.ajax({ 

	url: url, 

	type: "POST", 

	dataType:"json", 

	data: { url: "http://www.baidu.com" }, 

	error: function () { 

	alert("bbb"); 

	}, 

	success: function (data) { 

	$("#showIframe").append(data); 

	//$("#showIframe div").hide(); 

	//$("#showIframe>#container").show(); 

	//$("#showIframe>#container>#content").show(); 

	//$("#showIframe>#container>#content>.cmsPage").show(); 

	} 

	}); 

	}); 

	//后台 

	//爬虫本质，发送URL请求，返回整个页面HTML 

	[HttpPost] 

	public JsonResult GetPageHtml(string url) 

	{ 

	string pageinfo; 

	try 

	{ 

	HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(url); 

	myReq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; 

	myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)"; 

	HttpWebResponse myRep = (HttpWebResponse)myReq.GetResponse(); 

	Stream myStream = myRep.GetResponseStream(); 

	StreamReader sr = new StreamReader(myStream, Encoding.Default); 

	pageinfo = sr.ReadToEnd().ToString(); 

	} 

	catch 

	{ 

	pageinfo = ""; 

	} 

	return Json(pageinfo); 

	}