之前实现了使用流来讲http和ftp的文件下载到本地,也实现了将本地文件上传到hdfs上,那现在就可以做到将
ftp和http的文件转移到hdfs上了,而不用先将ftp和http的文件拷贝到本地再上传到hdfs上了。其实这个东西的原理
很简单,就是使用流,将ftp或http的文件读入到流中,然后将流中的内容传送到hdfs上,这样子就不用让数据存到
本地的硬盘上了,只是让内存来完成这个转移的过程,希望这个工具,能够帮到有这样需求的同学~
这里先附上之前的几个工具的链接:
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
import java.io.InputStream; import java.io.OutputStream; import java.io.IOException; public class FileTrans { private String head = "" ; private String hostname = "" ; private String FilePath = "" ; private String hdfsFilePath = "" ; private HDFSUtil hdfsutil = null ; private FtpClient ftp; private HttpUtil http; public void setFilePath(String FilePath){ this .FilePath = FilePath; } public String getFilePath(String FilePath){ return this .FilePath; } public void sethdfsFilePath(String hdfsFilePath){ this .hdfsFilePath = hdfsFilePath; } public String gethdfsFilePath(String hdfsFilePath){ return this .hdfsFilePath; } public void setHostName(String hostname){ this .hostname = hostname; } public String getHostName(){ return this .hostname; } public void setHead(String head){ this .head = head; } public String getHead(){ return this .head; } public FileTrans(String head, String hostname, String filepath, String hdfsnode,String hdfsFilepath){ this .head = head; this .hostname = hostname; this .FilePath = filepath; this .hdfsFilePath = hdfsFilepath; if (head.equals( "ftp" ) && hostname != "" ){ this .ftp = new FtpClient( this .hostname); } if ((head.equals( "http" ) || head .equals( "https" )) && hostname != "" ){ String httpurl = head + "://" + hostname + "/" + filepath; this .http = new HttpUtil(httpurl); } if (hdfsnode != "" ){ this .hdfsutil = new HDFSUtil(hdfsnode); } this .hdfsutil.setHdfsPath( this .hdfsFilePath); this .hdfsutil.setFilePath(hdfsutil.getHdfsNode()+hdfsutil.getHdfsPath()); this .hdfsutil.setHadoopSite( "./hadoop-site.xml" ); this .hdfsutil.setHadoopDefault( "./hadoop-default.xml" ); this .hdfsutil.setConfigure( false ); } public static void main(String[] args) throws IOException{ String head = "" ; String hostname = "" ; String filepath = "" ; String hdfsfilepath = "" ; String hdfsnode = "" ; String localpath = "" ; InputStream inStream = null ; int samplelines = 0 ; try { head = args[ 0 ]; //远端服务器类型,http还是ftp hostname = args[ 1 ]; //远端服务器hostname filepath = args[ 2 ]; //远端文件路径 hdfsnode = args[ 3 ]; //hdfs的机器名,不带hdfs开头 hdfsfilepath = args[ 4 ]; //hdfs的文件路径 localpath = args[ 5 ]; //如果需要在本地保存一份的话,输入本地的路径,不保存,传入空格或者samplelines传入0 samplelines = Integer.parseInt(args[ 6 ]); //保存在本地的话,保存前N行,如果不保存,填0 } catch (Exception e){ System.out.println( "[FileTrans]:input args error!" ); e.printStackTrace(); } FileTrans filetrans = new FileTrans(head, hostname, filepath, hdfsnode,hdfsfilepath); if (filetrans == null ){ System.out.println( "filetrans null" ); return ; } if (filetrans.ftp == null && head.equals( "ftp" )){ System.out.println( "filetrans ftp null" ); return ; } if (filetrans.http == null && (head.equals( "http" ) || head.equals( "https" ))){ System.out.println( "filetrans ftp null" ); return ; } try { if (head.equals( "ftp" )){ inStream = filetrans.ftp.getStream(filepath); if (samplelines > 0 ){ filetrans.ftp.writeStream(inStream, localpath, samplelines); } } else { inStream = filetrans.http.getStream(head + "://" + hostname + "/" + filepath); if (samplelines > 0 ){ filetrans.http.downLoad(head + "://" + hostname + "/" + filepath, localpath, samplelines); } } filetrans.hdfsutil.upLoad(inStream, filetrans.hdfsutil.getFilePath()); if (head == "ftp" ){ filetrans.ftp.disconnect(); } } catch (IOException e){ System.out.println( "[FileTrans]: file trans failed!" ); e.printStackTrace(); } System.out.println( "[FileTrans]: file trans success!" ); } } |
编译有问题的话,在hadoop工具的那篇文章中有提到,可以参考
注:最好将其他三个工具的文件放在同一个目录下,如果不放在一起,那么请自行引用
这个工具既可以将ftp或者http转移到hdfs,也能将前N行保存到本地,进行分析
以上就是本文所述的全部内容了,希望能够对大家学习java有所帮助。
请您花一点时间将文章分享给您的朋友或者留下评论。我们将会由衷感谢您的支持!