UVa 1449 - Dominating Patterns

contents

  1. 1. Problem
  2. 2. Input
  3. 3. Output
  4. 4. Sample Input
  5. 5. Sample Output
  6. 6. Solution

Problem

The archaeologists are going to decipher a very mysterious ``language”. Now, they know many language patterns; each pattern can be treated as a string on English letters (only lower case). As a sub string, these patterns may appear more than one times in a large text string (also only lower case English letters).

What matters most is that which patterns are the dominating patterns. Dominating pattern is the pattern whose appearing times is not less than other patterns.

It is your job to find the dominating pattern(s) and their appearing times.

Input

The entire input contains multi cases. The first line of each case is an integer, which is the number of patterns N, 1$\le$N$\le$150. Each of the following N lines contains one pattern, whose length is in range [1, 70]. The rest of the case is one line contains a large string as the text to lookup, whose length is up to 106.

At the end of the input file, number `0’ indicates the end of input file.

Output

For each of the input cases, output the appearing times of the dominating pattern(s). If there are more than one dominating pattern, output them in separate lines; and keep their input order to the output.

Sample Input

1
2
3
4
5
6
7
8
9
10
11
12
13
2
aba
bab
ababababac
6
beta
alpha
haha
delta
dede
tata
dedeltalphahahahototatalpha
0

Sample Output

1
2
3
4
5
4
aba
2
alpha
haha

Solution

題目描述:

給 N 個字串,問在 S 字串中哪些字串出現最多次。

題目解法:

對 N 個字串建造 AC 自動機,將 S 丟入自動機去計算給一個單詞出現次數。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <queue>
#include <map>
#define maxKind 26
using namespace std;
struct Node{
Node *fail;
Node *next[maxKind];
int cnt;
int who;
Node() {
fail = NULL;
cnt = 0;
who = 0;
memset(next, 0, sizeof(next));
}
};
void build_Trie(const char* str, Node *root, int who) {
Node *p = root;
int i = 0, idx;
while(str[i]) {
if(str[i] >= 'a' && str[i] <= 'z')
idx = str[i] - 'a';
if(p->next[idx] == NULL) {
p->next[idx] = new Node();
}
p = p->next[idx];
i++;
}
p->cnt++;
p->who = who;
}
void build_AC_automation(Node *root) {
root->fail = NULL;
queue<Node*> Q;
Q.push(root);
Node *tn, *p;
while(!Q.empty()) {
tn = Q.front();
Q.pop();
for(int i = 0; i < maxKind; i++) {
if(tn->next[i] == NULL)
continue;
Q.push(tn->next[i]);
p = tn->fail;
while(p != NULL && p->next[i] == NULL)
p = p->fail;
if(p == NULL)
tn->next[i]->fail = root;
else
tn->next[i]->fail = p->next[i];
}
}
}
void free_AC_automation(Node *root) {
queue<Node*> Q;
Q.push(root);
Node *tn, *p;
while(!Q.empty()) {
tn = Q.front();
Q.pop();
for(int i = 0; i < maxKind; i++) {
if(tn->next[i] != NULL) {
Q.push(tn->next[i]);
}
}
free(tn);
}
}
void query(const char* str, Node *root, int cnt[]) {
int i = 0, idx;
Node *tn, *p;
tn = root;
while(str[i]) {
if(str[i] >= 'a' && str[i] <= 'z')
idx = str[i] - 'a';
while(tn->next[idx] == NULL && tn != root)
tn = tn->fail;
tn = tn->next[idx];
tn = (tn == NULL) ? root : tn;
p = tn;
while(p != root) {
if(p->cnt > 0)
cnt[p->who]++;
p = p->fail;
}
i++;
}
}
char buf[1048576], pattern[256][256];
int main() {
int n;
while(scanf("%d", &n) == 1 && n) {
Node *root = new Node();
for(int i = 0; i < n; i++) {
scanf("%s", pattern[i]);
build_Trie(pattern[i], root, i+1);
}
build_AC_automation(root);
scanf("%s", buf);
int cnt[256] = {};
query(buf, root, cnt);
free_AC_automation(root);
int maxMatch = cnt[0];
for(int i = 0; i < n; i++) {
maxMatch = max(maxMatch, cnt[i+1]);
}
printf("%d\n", maxMatch);
for(int i = 0; i < n; i++) {
if(cnt[i+1] == maxMatch)
printf("%s\n", pattern[i]);
}
}
return 0;
}