第一步,构建Trie树,定义Node类型:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
/** * Created by zhaoyy on 2017/2/7. */ interface Node { char value(); boolean exists(); boolean isRoot(); Node parent(); Node childOf( char c); Node fail(); void setFail(Node node); void setExists( boolean exists); void add(Node child); List<Node> children(); } |
第二步,实现两种Node,如果词汇全是可打印的ASCII字符,就采用AsciiNode,否则(比如包含汉字),使用基于hash表的MapNode;这两种Node均集成自AbstractNode:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
/** * Created by zhaoyy on 2017/2/8. */ abstract class AbstractNode implements Node { private static final char EMPTY = '\0' ; private final char value; private final Node parent; private boolean exists; private Node fail; public AbstractNode(Node parent, char value) { this .parent = parent; this .value = value; this .exists = false ; this .fail = null ; } public AbstractNode() { this ( null , EMPTY); } private static String tab( int n) { StringBuilder builder = new StringBuilder(); for ( int i = 0 ; i < n; i++) { builder.append( '\t' ); } return builder.toString(); } private static String toString(Node node, int depth) { StringBuilder builder = new StringBuilder(); String tab = tab(depth); Node fail = node.fail(); Node parent = node.parent(); builder .append(tab) .append( '<' ) .append(node.value()) .append( " exists=\"" ) .append(node.exists()) .append( '"' ) .append( " parent=\"" ) .append(parent == null ? "null" : parent.value()) .append( '"' ) .append( " fail=\"" ) .append(fail == null ? "null" : fail.value()) .append( "\">\r\n" ); for (Node child : node.children()) builder.append(toString(child, depth + 1 )); builder .append(tab) .append( "</" ) .append(node.value()) .append( ">\r\n" ); return builder.toString(); } @Override public char value() { return value; } @Override public boolean exists() { return exists; } @Override public boolean isRoot() { return value == EMPTY; } @Override public Node parent() { return parent; } @Override public Node fail() { return fail; } @Override public void setFail(Node node) { this .fail = node; } @Override public void setExists( boolean exists) { this .exists = exists; } @Override public String toString() { return toString( this , 0 ); } } ///////////////////////////////////////////////////////////////////////////////////////////// /** * Created by zhaoyy on 2017/2/8. */ final class AsciiNode extends AbstractNode implements Node { private static final char FROM = 32 ; private static final char TO = 126 ; private final Node[] children; public AsciiNode() { super (); this .children = new Node[TO - FROM + 1 ]; } public AsciiNode(Node parent, char value) { super (parent, value); this .children = new Node[TO - FROM + 1 ]; } @Override public Node childOf( char c) { if (c >= FROM && c <= TO) return children[( int ) c - FROM]; else return null ; } @Override public void add(Node child) { int index = ( int ) child.value(); children[index - FROM] = child; } @Override public List<Node> children() { List<Node> nodes = new ArrayList<Node>(); for (Node child : children) if (child != null ) nodes.add(child); return nodes; } } ////////////////////////////////////////////////////////////////////////////////////////////// /** * Created by zhaoyy on 2017/2/8. */ final class MapNode extends AbstractNode implements Node { private final Map<Character, Node> children; public MapNode() { super (); this .children = new HashMap<Character, Node>(); } public MapNode(Node parent, char value) { super (parent, value); this .children = new HashMap<Character, Node>(); } @Override public Node childOf( char c) { return children.get(c); } @Override public void add(Node child) { children.put(child.value(), child); } @Override public List<Node> children() { List<Node> nodes = new ArrayList<Node>(); nodes.addAll(children.values()); return nodes; } } |
第三步,
首先定义一个Node构造器:
1
2
3
4
5
6
7
8
9
|
/** * Created by zhaoyy on 2017/2/8. */ public interface NodeMaker { Node make(Node parent, char value); Node makeRoot(); } |
然后构建AC自动机,实现创建及查找方法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
/** * Created by zhaoyy on 2017/2/7. */ public final class WordTable { private final Node root; private WordTable(Collection<? extends CharSequence> words, NodeMaker maker) { Node root = buildTrie(words, maker); setFailNode(root); this .root = root; } public static WordTable compile(Collection<? extends CharSequence> words) { if (words == null || words.isEmpty()) throw new IllegalArgumentException(); final NodeMaker maker; if (isAscii(words)) maker = new NodeMaker() { @Override public Node make(Node parent, char value) { return new AsciiNode(parent, value); } @Override public Node makeRoot() { return new AsciiNode(); } }; else maker = new NodeMaker() { @Override public Node make(Node parent, char value) { return new MapNode(parent, value); } @Override public Node makeRoot() { return new MapNode(); } }; return new WordTable(words, maker); } private static boolean isAscii(Collection<? extends CharSequence> words) { for (CharSequence word : words) { int len = word.length(); for ( int i = 0 ; i < len; i++) { int c = ( int ) word.charAt(i); if (c < 32 || c > 126 ) return false ; } } return true ; } private static Node buildTrie(Collection<? extends CharSequence> sequences, NodeMaker maker) { Node root = maker.makeRoot(); for (CharSequence sequence : sequences) { int len = sequence.length(); Node current = root; for ( int i = 0 ; i < len; i++) { char c = sequence.charAt(i); Node node = current.childOf(c); if (node == null ) { node = maker.make(current, c); current.add(node); } current = node; if (i == len - 1 ) node.setExists( true ); } } return root; } private static void setFailNode( final Node root) { root.setFail( null ); Queue<Node> queue = new LinkedList<Node>(); queue.add(root); while (!queue.isEmpty()) { Node parent = queue.poll(); Node temp; for (Node child : parent.children()) { if (parent.isRoot()) child.setFail(root); else { temp = parent.fail(); while (temp != null ) { Node node = temp.childOf(child.value()); if (node != null ) { child.setFail(node); break ; } temp = temp.fail(); } if (temp == null ) child.setFail(root); } queue.add(child); } } } public boolean findAnyIn(CharSequence cs) { int len = cs.length(); Node node = root; for ( int i = 0 ; i < len; i++) { Node next = node.childOf(cs.charAt(i)); if (next == null ) { next = node.fail(); if (next == null ) { node = root; continue ; } } if (next.exists()) return true ; } return false ; } public List<MatchInfo> search(CharSequence cs) { if (cs == null || cs.length() == 0 ) return Collections.emptyList(); List<MatchInfo> result = new ArrayList<MatchInfo>(); int len = cs.length(); Node node = root; for ( int i = 0 ; i < len; i++) { Node next = node.childOf(cs.charAt(i)); if (next == null ) { next = node.fail(); if (next == null ) { node = root; continue ; } } if (next.exists()) { MatchInfo info = new MatchInfo(i, next); result.add(info); node = root; continue ; } node = next; } return result; } @Override public String toString() { return root.toString(); } } |
定义一个保存查找结果的实体:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
/** * Created by zhaoyy on 2017/2/7. */ public final class MatchInfo { private final int index; private final String word; public MatchInfo( int index, String word) { this .index = index; this .word = word; } public MatchInfo( int index, Node node) { StringBuilder builder = new StringBuilder(); while (node != null ) { if (!node.isRoot()) builder.append(node.value()); node = node.parent(); } String word = builder.reverse().toString(); this .index = index + 1 - word.length(); this .word = word; } public int getIndex() { return index; } public String getWord() { return word; } @Override public String toString() { return index + ":" + word; } } |
第四步,调用Demo:
1
2
3
4
5
6
|
public static void main(String[] args) { List<String> list = Arrays.asList( "say" , "her" , "he" , "she" , "shr" , "alone" ); WordTable table = WordTable.compile(list); System.out.println(table); System.out.println(table.search( "1shesaynothingabouthislivinghimalone" )); } |
以下是输出结果:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
< exists = "false" parent = "null" fail = "null" > < s exists = "false" parent = " " fail = " " > < a exists = "false" parent = "s" fail = "a" > < y exists = "true" parent = "a" fail = " " > </ y > </ a > < h exists = "false" parent = "s" fail = "h" > < e exists = "true" parent = "h" fail = "e" > </ e > < r exists = "true" parent = "h" fail = " " > </ r > </ h > </ s > < h exists = "false" parent = " " fail = " " > < e exists = "true" parent = "h" fail = " " > < r exists = "true" parent = "e" fail = " " > </ r > </ e > </ h > < a exists = "false" parent = " " fail = " " > & l t;l exists = "false" parent = "a" fail = " " > < o exists = "false" parent = "l" fail = " " > < n exists = "false" parent = "o" fail = " " > < e exists = "true" parent = "n" fail = " " > </ e > </ n > </ o > & l t;/l> </ a > </ > [1:she, 4:say, 31:alone] |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://my.oschina.net/u/2541538/blog/833284