本文实例为大家分享了java抓取邮箱号码的具体代码,供大家参考,具体内容如下
java抓取文件中邮箱号码的具体代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
package reg; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestEmail { public static void main(String[] args) { // TODO Auto-generated method stub BufferedReader br= null ; try { br= new BufferedReader( new FileReader( "D:/1.htm" )); String str= null ; StringBuilder sb= new StringBuilder(); while ((str=br.readLine())!= null ){ sb.append(str); } List es=getEmail(sb.toString()); for (String e:es){ System.out.println(e); } } catch (FileNotFoundException e) { // TODO: handle exception e.printStackTrace(); } catch (IOException e) { // TODO: handle exception e.printStackTrace(); } finally { try { if (br!= null ) br.close(); } catch (IOException e) { // TODO: handle exception e.printStackTrace(); } } } public static List getEmail(String str){ List es= new ArrayList(); Pattern p=Pattern.compile( "[\\w\\.-]*\\w+@[\\w\\.-]*\\w+\\.\\w{2,5}" ); // Pattern p=Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"); Matcher m=p.matcher(str); while (m.find()){ es.add(m.group()); } return es; } } |
java抓取网页中邮箱号码的具体代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
package reg; import java.io.BufferedReader; import java.io.InputStreamReader; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Testemail01 { public static String getWebCon(String domain) { System.out.println( "开始抓取邮件地址..(" +domain+ ")" ); StringBuffer sb= new StringBuffer(); try { java.net.URL url= new java.net.URL(domain); BufferedReader in= new BufferedReader( new InputStreamReader(url.openStream())); String line; while ((line=in.readLine())!= null ) { parse(line); } in.close(); } catch (Exception e) { sb.append(e.toString()); System.err.println(e); } return sb.toString(); } public static void main(String[] args) { String s=Testemail01.getWebCon( "http://tieba.baidu.com/p/2366935784" ); } private static void parse(String line) { Pattern p=Pattern.compile( "[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+" ); //邮箱的正则表达式 Matcher m=p.matcher(line); while (m.find()) { System.out.println(m.group()); } } } |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。