服务器之家:专注于服务器技术及软件下载分享
分类导航

PHP教程|ASP.NET教程|Java教程|ASP教程|编程技术|正则表达式|C/C++|IOS|C#|Swift|Android|VB|R语言|JavaScript|易语言|vb.net|

服务器之家 - 编程语言 - Java教程 - Java实现爬取往期所有双色球开奖结果功能示例

Java实现爬取往期所有双色球开奖结果功能示例

2021-05-16 16:43ithouse Java教程

这篇文章主要介绍了Java实现爬取往期所有双色球开奖结果功能,涉及Java网页抓取、正则替换、文件读写等相关操作技巧,需要的朋友可以参考下

本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考,具体如下:

梦想还是要有的,万一实现了呢?我相信经常买双色球的朋友和我都会有一个疑问,就是往期双色球的开奖结果是什么?我钟意的这一注双色球在往期是否开过一等奖,如果开过的话,基本上可以放弃这一注了,因为历史上应该没有出现过两期双色球开奖完全一致的吧?那么往期的开奖结果是什么呢?我自己用java写了一个简易的类,爬取所有双色球开奖结果,本来想开发安卓版本的,由于ui等需要时间准备,有缘再开发吧。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import java.io.bufferedreader;
import java.io.bufferedwriter;
import java.io.file;
import java.io.filewriter;
import java.io.ioexception;
import java.io.inputstream;
import java.io.inputstreamreader;
import java.net.httpurlconnection;
import java.net.url;
import java.util.regex.matcher;
import java.util.regex.pattern;
import java.util.zip.gzipinputstream;
public class allballs {
 private static stringbuffer mstringbuffer;
 public static void main(string[] args) {
  system.out.println("正在获取...");
  mstringbuffer = new stringbuffer();
  string baseurlprefix = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_";
  string baseurlsuffix = ".html";
  string homeurl = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html";
  string pagecountcontent = gethtmlstring(homeurl);
  int pagecount = getpagecount(pagecountcontent);
  if (pagecount > 0) {
   for (int i = 1; i <= pagecount; i++) {
    string url = baseurlprefix + i + baseurlsuffix;
    string pagecontent = gethtmlstring(url);
    if (pagecontent != null && !pagecontent.equals("")) {
     getonetermcontent(pagecontent);
    } else {
     system.out.println("第" + i + "页丢失");
    }
    try {
     thread.sleep(1200);
    } catch (exception e) {
     // todo: handle exception
    }
   }
   file file = new file("双色球.txt");
   if (file.exists()) {
    file.delete();
   }
   try {
    filewriter writer = new filewriter(file);
    bufferedwriter bufferedwriter = new bufferedwriter(writer);
    bufferedwriter.write(mstringbuffer.tostring());
    bufferedwriter.close();
    writer.close();
   } catch (ioexception e) {
    // todo auto-generated catch block
    e.printstacktrace();
   }
   //bufferedwriter writer = new bufferedwriter(new outputs)
  } else {
   system.out.println("结果页数为0");
  }
  system.out.println("完成!");
 }
 /**
  * 获取总页数
  * @param result
  */
 private static int getpagecount(string result) {
  string regex = "\\d+\">末页";
  pattern pattern = pattern.compile(regex);
  matcher matcher = pattern.matcher(result);
  string[] splits = null;
  while (matcher.find()) {
   string content = matcher.group();
   splits = content.split("\"");
   break;
  }
  if (splits != null && splits.length == 2) {
   string countstring = splits[0];
   if (countstring != null && !countstring.equals("")) {
    return integer.parseint(countstring);
   }
  }
  return 0;
 }
  /**
  * 获取网页源码
  * @return
  */
 private static string gethtmlstring(string targeturl) {
  string content = null;
  httpurlconnection connection = null;
  try {
   url url = new url(targeturl);
   connection = (httpurlconnection) url.openconnection();
   connection.setrequestmethod("post");
   connection.setrequestproperty("user-agent", "mozilla/4.0 (compatible; msie 7.0; windows 7)");
   connection.setrequestproperty("accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*");
   connection.setrequestproperty("accept-language", "zh-cn");
   connection.setrequestproperty("ua-cpu", "x86");
   //为什么没有deflate呢
   connection.setrequestproperty("accept-encoding", "gzip");
   connection.setrequestproperty("content-type", "text/html");
   //keep-alive,有什么用呢,你不是在访问网站,你是在采集。嘿嘿。减轻别人的压力,也是减轻自己。
   connection.setrequestproperty("connection", "close");
   //不要用cache,用了也没有什么用,因为我们不会经常对一个链接频繁访问。(针对程序)
   connection.setusecaches(false);
   connection.setconnecttimeout(6 * 1000);
   connection.setreadtimeout(6 * 1000);
   connection.setdooutput(true);
   connection.setdoinput(true);
   connection.setrequestproperty("charset", "utf-8");
   connection.connect();
   if (200 == connection.getresponsecode()) {
    inputstream inputstream = null;
    if (connection.getcontentencoding() != null && !connection.getcontentencoding().equals("")) {
     string encode = connection.getcontentencoding().tolowercase();
     if (encode != null && !encode.equals("") && encode.indexof("gzip") >= 0) {
      inputstream = new gzipinputstream(connection.getinputstream());
     }
    }
    if (null == inputstream) {
     inputstream = connection.getinputstream();
    }
    bufferedreader reader = new bufferedreader(new inputstreamreader(inputstream, "utf-8"));
    stringbuilder builder = new stringbuilder();
    string line = null;
    while ((line = reader.readline()) != null) {
     builder.append(line).append("\n");
    }
    content = builder.tostring();
   }
  } catch (exception e) {
   e.printstacktrace();
  } finally {
   if (connection != null) {
    connection.disconnect();
   }
  }
  return content;
 }
 private static void getonetermcontent(string pagecontent) {
  string regex = "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>";
  pattern pattern = pattern.compile(regex);
  matcher matcher = pattern.matcher(pagecontent);
  while (matcher.find()) {
   string onetermcontent = matcher.group();
   getonetermnumbers(onetermcontent);
  }
 }
 private static void getonetermnumbers(string onetermcontent) {
  string regex = ">\\d+<";
  pattern pattern = pattern.compile(regex);
  matcher matcher = pattern.matcher(onetermcontent);
  while (matcher.find()) {
   string content = matcher.group();
   string ballnumber = content.substring(1, content.length()-1);
   mstringbuffer.append(ballnumber).append(" ");
  }
  mstringbuffer.append("\r\n");
 }
}

运行结果:

Java实现爬取往期所有双色球开奖结果功能示例

希望本文所述对大家java程序设计有所帮助。

原文链接:https://blog.csdn.net/ithouse/article/details/50908296

延伸 · 阅读

精彩推荐