UVa 1502 - GRE Words

contents

  1. 1. Problem
  2. 2. Sample Input
  3. 3. Sample Output
  4. 4. Solution

Problem

依序背單字,下一個單字中要出現當前單字為 substring 的情況,每一個單字都有各自的權重,求背一次能得到的最高權重總合為多少。

如範例輸入背的順序為 a -> ab -> abb -> abbab 答案為 1 + 2 + 3 + 8 = 14

Sample Input

1
2
3
4
5
6
7
1
5
a 1
ab 2
abb 3
baba 5
abbab 8

Sample Output

1
Case #1: 14

Solution

這一題的測資有問題,輸入中出現非小寫字母,導致存取發生錯誤,部分程序沒有考慮這一點,居然就通過了?
一群人 Invalid Memory Access 通過,而我因寫法比較不同成了 RE 下亡魂,測試數據格式錯誤啊!坑爹啊,害我掛載好多 assert 抓哪裡溢出 …

原本想說是一道有趣的 AC 自動機複習,將 fail 指針 (suffix link) 整理成另一棵樹,對其 fail tree 壓樹 (走訪),套上線段樹等數據結構,就能支持多字串匹配的數據查找。當匹配時將會一直攀爬 fail 指針,最後爬到 root,中間經過的節點都是匹配的字串,也因此單看 fail 指針,也肯定是 tree。

在走訪時,會匹配 fail link 上的所有節點 (都是其 suffix string)。dp[i] 表示使用第 i 個字串為背單字序列結尾的最大權重,則 dp[i] = max(dp[j]) + w,其中滿足 j < i 且 word[j] 是 word[i] 的 substring。

窮舉第 i 字符串的後綴匹配節點,找尋匹配節點藉由 fail link 到 root 上節點的最大值 (max(dp[j]))。

利用線段樹完成區間最大值更新、單點查詢 (找尋節點到 root 的最大值,更新一個點的權重時,相當於作區間最大值更新,藉由壓樹其子樹被表示成區間)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
// NOTES: has error test data, there are some characters which aren't lowercase letters.
// FUCK
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <queue>
#include <map>
#include <algorithm>
#include <assert.h>
#include <assert.h>
#pragma comment( linker, "/STACK:1024000000,1024000000")
using namespace std;
const int MAXCHAR = 26;
const int MAXNODE = 1048576;
class ACmachine {
public:
struct Node {
Node *next[MAXCHAR], *fail;
int cnt, val, id, nid;
void init() {
fail = NULL;
cnt = val = 0;
nid = id = -1;
memset(next, 0, sizeof(next));
}
} nodes[MAXNODE];
Node *root;
int size;
vector<int> fg[MAXNODE];
Node* getNode() {
Node *p = &nodes[size++];
p->init(), p->nid = size-1;
assert(size < MAXNODE);
fg[p->nid].clear();
return p;
}
void init() {
size = 0;
root = getNode();
}
int toIndex(char c) {
assert(c - 'a' >= 0 && c - 'a' < MAXCHAR);
return c - 'a';
}
void dfs(Node *p, Node *q) {
for (int i = 0; i < MAXCHAR; i++) {
if (q->next[i]) {
Node *u = p->next[i], *v = q->next[i];
if (u == NULL)
p->next[i] = getNode(), u = p->next[i];
u->cnt |= v->cnt;
dfs(u, v);
}
}
}
void merge(const ACmachine &other) {
dfs(root, other.root);
}
void insert(const char str[], int sid) {
Node *p = root;
for (int i = 0, idx; str[i]; i++) {
idx = toIndex(str[i]);
if (p->next[idx] == NULL)
p->next[idx] = getNode();
p = p->next[idx];
}
p->cnt = 1;
if (sid >= 0) p->id = sid;
}
int find(const char str[]) {
Node *p = root;
for (int i = 0, idx; str[i]; i++) {
idx = toIndex(str[i]);
if (p->next[idx] == NULL)
p->next[idx] = getNode();
p = p->next[idx];
}
return p->cnt;
}
void build() { // AC automation
queue<Node*> Q;
Node *u, *p;
Q.push(root), root->fail = NULL;
while (!Q.empty()) {
u = Q.front(), Q.pop();
if (u != root) {
assert(u->fail != NULL);
assert(u->nid != u->fail->nid);
fg[u->nid].push_back(u->fail->nid);
fg[u->fail->nid].push_back(u->nid);
}
for (int i = 0; i < MAXCHAR; i++) {
if (u->next[i] == NULL)
continue;
Q.push(u->next[i]);
p = u->fail;
while (p != NULL && p->next[i] == NULL)
p = p->fail;
if (p == NULL || p->next[i] == NULL)
u->next[i]->fail = root;
else
u->next[i]->fail = p->next[i];
u->next[i]->val = u->next[i]->fail->val + u->next[i]->cnt;
u->next[i]->id = u->next[i]->fail->id | u->next[i]->id;
}
}
}
int query(const char str[]) {
Node *u = root, *p;
int matched = 0;
for (int i = 0, idx; str[i]; i++) {
idx = toIndex(str[i]);
while (u->next[idx] == NULL && u != root)
u = u->fail;
u = u->next[idx];
u = (u == NULL) ? root : u;
p = u;
matched += p->val;
}
return matched;
}
void free() {
return ;
// owner memory pool version
queue<Node*> Q;
Q.push(root);
Node *u;
while (!Q.empty()) {
u = Q.front(), Q.pop();
for (int i = 0; i < MAXCHAR; i++) {
if (u->next[i] != NULL) {
Q.push(u->next[i]);
}
}
delete u;
}
}
};
const int MAXN = 1048576;
class SegmentTree {
public:
struct Node {
long long mx;
pair<int, long long> label;
void init() {
mx = -1LL<<60;
label = make_pair(0, 0);
}
} nodes[MAXN];
void pushDown(int k, int l, int r) {
int mid = (l + r)/2;
if (nodes[k].label.first) {
maxUpdate(k<<1, l, mid, nodes[k].label.second);
maxUpdate(k<<1|1, mid+1, r, nodes[k].label.second);
nodes[k].label = make_pair(0, 0); // cancel
}
}
void pushUp(int k) {
nodes[k].mx = max(nodes[k<<1].mx, nodes[k<<1|1].mx);
}
void build(int k, int l, int r) {
nodes[k].init();
if (l == r) {
nodes[k].mx = 0;
return ;
}
int mid = (l + r)/2;
build(k<<1, l, mid);
build(k<<1|1, mid+1, r);
pushUp(k);
}
// operator, assign > add
void maxUpdate(int k, int l, int r, long long val) {
if (nodes[k].label.first)
val = max(val, nodes[k].label.second);
nodes[k].label = make_pair(1, val);
nodes[k].mx = max(nodes[k].mx, val);
}
void assign(int k, int l, int r, int x, int y, int val) {
if (x <= l && r <= y) {
maxUpdate(k, l, r, val);
return;
}
pushDown(k, l, r);
int mid = (l + r)/2;
if (x <= mid)
assign(k<<1, l, mid, x, y, val);
if (y > mid)
assign(k<<1|1, mid+1, r, x, y, val);
pushUp(k);
}
// query
long long r_mx;
void qinit() {
r_mx = -1LL<<60;
}
void query(int k, int l, int r, int x, int y) {
if (x <= l && r <= y) {
r_mx = max(r_mx, nodes[k].mx);
return ;
}
pushDown(k, l, r);
int mid = (l + r)/2;
if (x <= mid)
query(k<<1, l, mid, x, y);
if (y > mid)
query(k<<1|1, mid+1, r, x, y);
}
};
ACmachine ac;
SegmentTree tree;
vector<string> words;
vector<int> wvals;
char s[1048576];
int bPos[MAXN], ePos[MAXN], inIdx;
void prepare(int u, int p) {
bPos[u] = ++inIdx;
for (int i = 0; i < ac.fg[u].size(); i++) {
if (ac.fg[u][i] == p) continue;
prepare(ac.fg[u][i], u);
}
ePos[u] = inIdx;
}
void solve() {
for (int i = 0; i < ac.size; i++)
bPos[i] = ePos[i] = 0;
inIdx = 0;
for (int i = 0; i < ac.size; i++) {
if (bPos[i] == 0) {
prepare(i, -1); // interval [1, inIdx]
}
}
assert(inIdx < MAXN/2);
tree.build(1, 1, inIdx);
long long ret = 0;
for (int i = 0; i < words.size(); i++) {
ACmachine::Node *u = ac.root;
int idx;
long long dpval = 0;
for (int j = 0; j < words[i].length(); j++) {
idx = ac.toIndex(words[i][j]);
while (u->next[idx] == NULL && u != ac.root)
u = u->fail;
u = u->next[idx];
u = (u == NULL) ? ac.root : u;
tree.qinit();
tree.query(1, 1, inIdx, bPos[u->nid], bPos[u->nid]);
dpval = max(dpval, tree.r_mx);
}
dpval += wvals[i];
tree.assign(1, 1, inIdx, bPos[u->nid], ePos[u->nid], dpval);
ret = max(ret, dpval);
}
printf("%lld\n", ret);
}
int main() {
// freopen("in.txt", "r+t", stdin);
// freopen("out.txt", "w+t", stdout);
int testcase, cases = 0;
int N, val;
scanf("%d", &testcase);
while (testcase--) {
scanf("%d", &N);
ac.init();
words.clear(), wvals.clear();
for (int i = 0; i < N; i++) {
scanf("%s %d", s, &val);
ac.insert(s, i);
words.push_back(s), wvals.push_back(val);
}
ac.build();
printf("Case #%d: ", ++cases);
solve();
ac.free();
}
return 0;
}
/*
999
4
abbaaa 49
bba 41
ba 19
bbba 20
8
abbaaa 49
bba 41
ba 19
bbba 20
abbbabaaa 21
b 47
ababba 26
a 0
3
abaab 35
ab 9
abaabaaabbabababaaab 35
8
aabbbbbababbaaaab 14
abaabbaaaaaabbaabbba 13
babaaababaaaaabababa 17
abaab 35
ab 9
abaabaaabbabababaaab 35
bbbaaa 31
aabab 34
32
aa 19
bababbabbbaabaaaab 40
aabbbaaaaaa 48
ababaaabbabbbaabbb 38
a 42
bbbbbaabbbaa 10
baabbbababaab 41
aabbabbabba 28
baaab 3
aaaaaaaaa 15
bbbabbababaaabab 16
aab 18
baaab 2
abbbab 2
bababa 4
bbabbbbbaabbaaaababb 35
bbbaabba 34
bbbb 24
bbbb 17
babbbabbababababaa 9
ababaaaa 38
bbbb 3
bba 31
aabaabaaa 34
aabbbbbaababaa 41
aabbbaabbbabaababbbb 45
abbbaabbababab 37
ababbaabb 3
bbabbbbbaba 8
a 10
babbabaabbbbab 34
bbababaaaaaaaaa 5
*/