在看KMP算法时,想要简单的统计一下执行时间和性能。
得出的结论是: Java的String的indexOf方法性能最好,其次是KMP算法,其次是传统的BF算法,当然,对比有点牵强,SUN的算法也使用Java来实现、用的看着不像是KMP,还需要详细研究一下。
测试代码如下所示:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
|
package com.test.test.kmp; import java.util.Random; public class KMPTest { public static void main(String[] args) { test(); } // public static void test() { // int allLen = 8000000 ; int subLen = 11 ; int charLen = 4 ; String cl = buildString(charLen); int times = 50 ; // testMatch(allLen, subLen, cl, times); } public static void testMatch( int allLen, int subLen, String cl, int times){ int allBF = 0 ; int allString = 0 ; int allKMP = 0 ; int allGC = 0 ; int allbuild = 0 ; int alltoArray = 0 ; long start = System.currentTimeMillis(); System.out.println( "--------------------------" ); for ( int i = 0 ; i < times; i++) { // 1. 构造字符串的损耗 long buildStart = System.currentTimeMillis(); String sub = buildString(subLen, cl); String all = buildString(allLen, sub) +sub; long buildEnd = System.currentTimeMillis(); allbuild += (buildEnd - buildStart); // 2. toCharArray的损耗 long toArrayStart = System.currentTimeMillis(); char [] allArray = all.toCharArray(); char [] subArray = sub.toCharArray(); long toArrayEnd = System.currentTimeMillis(); alltoArray += (toArrayEnd - toArrayStart); // long startBF = System.currentTimeMillis(); int indexBF = bfIndexOf(all, sub); long endBF = System.currentTimeMillis(); // long timeoutBF = endBF - startBF; allBF += timeoutBF; System.gc(); allGC += (System.currentTimeMillis() - endBF); // long startString = System.currentTimeMillis(); int indexString = stringIndexOf(all, sub); long endString = System.currentTimeMillis(); // long timeoutString = endString - startString; allString += timeoutString; System.gc(); allGC += (System.currentTimeMillis() - endString); // long startKMP = System.currentTimeMillis(); //int indexKMP = kmpIndexOf(all, sub); int indexKMP = KMP_Index(allArray, subArray); long endKMP = System.currentTimeMillis(); // long timeoutKMP = endKMP - startKMP; allKMP += timeoutKMP; System.gc(); allGC += (System.currentTimeMillis() - endKMP); // //System.out.println("all="+all.substring(0,100)+" ......"); //System.out.println("sub="+sub); // //System.out.println("indexBF="+indexBF+";耗时: "+timeoutBF+"ms"); //System.out.println("indexString="+indexString+";耗时: "+timeoutString+"ms"); if (indexBF == indexString && indexKMP == indexString){ //System.out.println("!!!!对比相等。"); } else { throw new RuntimeException( "对比出错." ); } // if (i % 20 == 10 ){ System.out.println(i+ "次bfIndexOf= " +allBF+ "ms" ); System.out.println(i+ "次stringIndexOf= " +allString+ "ms" ); System.out.println(i+ "次KMPIndexOf= " +allKMP+ "ms" ); System.out.println(i+ "次allbuild= " +allbuild+ "ms" ); System.out.println(i+ "次alltoArray= " +alltoArray+ "ms" ); System.out.println(i+ "*3次,GC= " +allGC+ "ms" ); long end = System.currentTimeMillis(); long allTime = end-start; long lossTime = allTime - (allBF+allString+allKMP+allGC); System.out.println(i+ "次,所有总计耗时: " +(allTime)+ "ms; 损耗:" + lossTime + "ms; 损耗比: " +(( 0.0 +lossTime)/allTime * 100 )+ "%" ); System.out.println( "--------------------------" ); } } // System.out.println(times+ "次bfIndexOf;总计耗时: " +allBF+ "ms" ); System.out.println(times+ "次stringIndexOf;总计耗时: " +allString+ "ms" ); System.out.println(times+ "次KMPIndexOf;总计耗时: " +allKMP+ "ms" ); System.out.println(times+ "次allbuild= " +allbuild+ "ms" ); System.out.println(times+ "次alltoArray= " +alltoArray+ "ms" ); System.out.println(times+ "*3次,GC;总计耗时: " +allGC+ "ms" ); long end = System.currentTimeMillis(); long allTime = end-start; long lossTime = allTime - (allBF+allString+allKMP+allGC); System.out.println(times+ "次,所有总计耗时: " +(allTime)+ "ms; 损耗:" + lossTime + "ms; 损耗比: " +(( 0.0 +lossTime)/allTime * 100 )+ "%" ); System.out.println( "--------------------------" ); } // // 构建字符 public static String buildString( int len){ return buildString(len, null ); } public static String buildString( int len, String sub){ // final int TYPE_NUMBER = 0 ; final int TYPE_LOWERCASE = 1 ; final int TYPE_UPPERCASE = 2 ; // long seed = System.nanoTime(); Random random = new Random(seed); // // StringBuilder builder = new StringBuilder(); for ( int i = 0 ; i < len; i++) { // int type = random.nextInt( 3 ); // 0-2 int cvalue = 0 ; if (TYPE_NUMBER == type){ cvalue = '0' + random.nextInt( 10 ); // 0~(n-1) } else if (TYPE_LOWERCASE == type){ cvalue = 'a' + random.nextInt( 26 ); // 0~(n-1) } else if (TYPE_UPPERCASE == type){ cvalue = 'A' + random.nextInt( 26 ); // 0~(n-1) } else { throw new RuntimeException(Random. class .getName()+ "#newxtInt(int);Wrong!" ); } // //cvalue = 'A' + random.nextInt(26);// 0~(n-1) // char c = ( char )cvalue; if ( null != sub && sub.length() > 1 ){ int index = random.nextInt(sub.length()); c = sub.charAt(index); } //String s = String.valueOf(c); builder.append(c); // } // return builder.toString(); } /** * Java自带的方法,内部实现了 * @param all * @param sub * @return */ public static int stringIndexOf(String all, String sub){ // 防御式编程 if ( null == all || null == sub){ return - 1 ; } // 调用Java的String方法 return all.indexOf(sub); } /** * BF算法 * @param all * @param sub * @return */ public static int bfIndexOf(String all, String sub){ // 防御式编程 if ( null == all || null == sub){ return - 1 ; } // int lenAll = all.length(); int lenSub = sub.length(); int i = 0 ; while (i < lenAll) { // 从i下标开始对比 int j = 0 ; while (j < lenSub) { // char all_i = all.charAt(i); char sub_j = sub.charAt(j); if (all_i == sub_j){ // 相等则继续对比下一个字符 i++; j++; // 这个增长很重要 } else { // 否则跳出内层循环 break ; } } // 如果 j 增长到和 lenSub相等,则匹配成功 if (lenSub == j){ return i - lenSub; } else { i = 1 +i - j; // 回溯 i } } // return - 1 ; } /** * KMP 算法 * @param all * @param sub * @return */ public static int kmpIndexOf(String all, String sub){ // char [] allArray = all.toCharArray(); char [] subArray = sub.toCharArray(); // return KMP_Index(allArray, subArray); } /** * 获得字符串的next函数值 * * @param t * 字符串 * @return next函数值 */ public static int [] next( char [] t) { int [] next = new int [t.length]; next[ 0 ] = - 1 ; int i = 0 ; int j = - 1 ; while (i < t.length - 1 ) { if (j == - 1 || t[i] == t[j]) { i++; j++; if (t[i] != t[j]) { next[i] = j; } else { next[i] = next[j]; } } else { j = next[j]; } } return next; } /** * KMP匹配字符串 * * @param allArray * 主串 * @param subArray * 模式串 * @return 若匹配成功,返回下标,否则返回-1 */ public static int KMP_Index( char [] allArray, char [] subArray) { int [] next = next(subArray); int i = 0 ; int j = 0 ; while (i <= allArray.length - 1 && j <= subArray.length - 1 ) { if (j == - 1 || allArray[i] == subArray[j]) { i++; j++; } else { j = next[j]; } } if (j < subArray.length) { return - 1 ; } else return i - subArray.length; // 返回模式串在主串中的头下标 } } |
测试的一个结果如下所示:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
-------------------------- 10次bfIndexOf= 94ms 10次stringIndexOf= 56ms 10次KMPIndexOf= 76ms 10次allbuild= 5870ms 10次alltoArray= 137ms 10*3次,GC= 155ms 10次,所有总计耗时: 6388ms; 损耗:6007ms; 损耗比: 94.03569192235442% -------------------------- 30次bfIndexOf= 365ms 30次stringIndexOf= 222ms 30次KMPIndexOf= 295ms 30次allbuild= 16452ms 30次alltoArray= 395ms 30*3次,GC= 452ms 30次,所有总计耗时: 18184ms; 损耗:16850ms; 损耗比: 92.66388033435987% -------------------------- 50次bfIndexOf;总计耗时: 550ms 50次stringIndexOf;总计耗时: 335ms 50次KMPIndexOf;总计耗时: 450ms 50次allbuild= 26501ms 50次alltoArray= 637ms 50*3次,GC;总计耗时: 734ms 50次,所有总计耗时: 29211ms; 损耗:27142ms; 损耗比: 92.91705179555647% -------------------------- |
从中可以看出来,使用StringBuilder构造字符串,800万*50次append大约消耗了26秒。换算下来每秒钟是1600万次。。。
看来是需要写一个专门计时的函数,本来Junit是有自己的统计的,但是样本不太好做。
如此看来,数据的准备,也就是测试用例的选取很关键,可能需要先生成并写入到文本文件中。