Java读取网页内容并下载图片的实例

2021-01-10 11:38Lovnx JAVA教程

这篇文章主要介绍了Java读取网页内容并下载图片的实例的相关资料,希望通过本文能帮助到大家，让大家实现这样的功能，需要的朋友可以参考下

Java读取网页内容并下载图片的实例

很多人在第一次了解数据采集的时候，可能无从下手，尤其是作为一个新手，更是感觉很是茫然，所以，在这里分享一下自己的心得，希望和大家一起分享技术，如果有什么不足，还请大家指正。写出这篇目的，就是希望大家一起成长，我也相信技术之间没有高低，只有互补，只有分享，才能使彼此更加成长。

示例代码：

									import java.io.BufferedInputStream;

									import java.io.BufferedReader;

									import java.io.File;

									import java.io.FileNotFoundException;

									import java.io.FileOutputStream;

									import java.io.IOException;

									import java.io.InputStreamReader;

									import java.net.MalformedURLException;

									import java.net.URL;

									import java.util.regex.Matcher;

									import java.util.regex.Pattern;

									public class GetContentPicture {

									public void getHtmlPicture(String httpUrl) {

									URL url;

									BufferedInputStream in;

									FileOutputStream file;

									try {

									  System.out.println("取网络图片");

									  String fileName = httpUrl.substring(httpUrl.lastIndexOf("/"));

									  String filePath = "./pic/";

									  url = new URL(httpUrl);

									  in = new BufferedInputStream(url.openStream());

									  file = new FileOutputStream(new File(filePath+fileName));

									  int t;

									  while ((t = in.read()) != -1) {

									  file.write(t);

									  }

									  file.close();

									  in.close();

									  System.out.println("图片获取成功");

									} catch (MalformedURLException e) {

									  e.printStackTrace();

									} catch (FileNotFoundException e) {

									  e.printStackTrace();

									} catch (IOException e) {

									  e.printStackTrace();

									}

									}

									public String getHtmlCode(String httpUrl) throws IOException {

									String content ="";

									URL uu = new URL(httpUrl); // 创建URL类对象

									BufferedReader ii = new BufferedReader(new InputStreamReader(uu

									  .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象

									String input;

									while ((input = ii.readLine()) != null) { // 建立读取循环，并判断是否有读取值

									  content += input;

									}

									ii.close();

									return content;

									}

									public void get(String url) throws IOException {

									String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";

									String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";

									String content = this.getHtmlCode(url);

									System.out.println(content);

									Pattern pattern = Pattern.compile(searchImgReg);

									Matcher matcher = pattern.matcher(content);

									while (matcher.find()) {

									  System.out.println(matcher.group(3));

									  this.getHtmlPicture(url+matcher.group(3));

									}

									pattern = Pattern.compile(searchImgReg2);

									matcher = pattern.matcher(content);

									while (matcher.find()) {

									  System.out.println(matcher.group(3));

									  this.getHtmlPicture(matcher.group(3));

									}

									// searchImgReg =

									// "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";

									}

									public static void main(String[] args) throws IOException {

									String url = "http://www.baidu.com/";

									GetContentPicture gcp = new GetContentPicture();

									gcp.get(url);

									}

									}