原题题面
The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of “ababab” is 3 and “ababa” is 1.
Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.
输入格式
The input consists of multiple test cases. Each test case contains exactly one line, which gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.
The last test case is followed by a line containing a ‘#’.
输出格式
For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.
输入样例
ccabababc daabbccaa #
输出样例
Case 1: ababab Case 2: aa
题面分析
对于一个字符串,找出其连续的重复次数最多的子串,如果有多个请输出字典序最小的那个。 首先关于后缀数组的所有理论请看这篇论文->国家集训队2009论文集——后缀数组 我们暴力枚举可能的长度L,然后再s[1],s[1+L],s[1+2L]… 的里面匹配他们的最长公共前缀和最长公共后缀,记为K。那么重复出现的次数就是K/L+1,我们找到最大的保存下来,并同时记录当时对应的L。 但这样找到的结果可能有多个,因此我们通过枚举**SA[i]**来匹配,这样找到的第一个结果一定是字典序最小的。 其他的细节看代码和论文即可。
AC代码(516ms)
#include <bits/stdc++.h>
using namespace std
;
const int maxn
=1e5;
#define ll long long
namespace ST
{
const int logn
=30;
int f
[maxn
+10][logn
], Log
[maxn
+10];
void init_log()
{
Log
[0]=-1;
Log
[1]=0;
Log
[2]=1;
for(int i
=3; i
<maxn
; i
++)
{
Log
[i
]=Log
[i
/2]+1;
}
}
void build(int n
, int *a
)
{
for(int i
=1; i
<=n
; i
++)
{
f
[i
][0]=a
[i
];
}
init_log();
for(int j
=1; j
<=logn
; j
++)
{
for(int i
=1; i
+(1<<j
)-1<=n
; i
++)
{
f
[i
][j
]=min(f
[i
][j
-1], f
[i
+(1<<(j
-1))][j
-1]);
}
}
}
int query(int l
, int r
)
{
if (l
>r
)
swap(l
, r
);
l
++;
int s
=Log
[r
-l
+1];
return min(f
[l
][s
], f
[r
-(1<<s
)+1][s
]);
}
}
namespace SA
{
int SA
[maxn
+10];
int Rank
[maxn
+10];
int Height
[maxn
+10];
int tax
[maxn
+10], tp
[maxn
+10];
int len
;
char s
[maxn
+10];
int m
;
void RSort()
{
for(int i
=0; i
<=m
; i
++) tax
[i
]=0;
for(int i
=1; i
<=len
; i
++) tax
[Rank
[tp
[i
]]]++;
for(int i
=1; i
<=m
; i
++) tax
[i
]+=tax
[i
-1];
for(int i
=len
; i
>=1; i
--) SA
[tax
[Rank
[tp
[i
]]]--]=tp
[i
];
}
int cmp(int *f
, int x
, int y
, int w
)
{
return f
[x
]==f
[y
] && f
[x
+w
]==f
[y
+w
];
}
void GetSA()
{
for(int i
=1; i
<=len
; i
++)
{
Rank
[i
]=s
[i
], tp
[i
]=i
;
}
RSort();
for(int w
=1, p
=1, i
; p
<len
; w
+=w
, m
=p
)
{
for(p
=0, i
=len
-w
+1; i
<=len
; i
++)
tp
[++p
]=i
;
for(i
=1; i
<=len
; i
++)
if (SA
[i
]>w
)
tp
[++p
]=SA
[i
]-w
;
RSort(), swap(Rank
, tp
), Rank
[SA
[1]]=p
=1;
for(i
=2; i
<=len
; i
++)
Rank
[SA
[i
]]=cmp(tp
, SA
[i
], SA
[i
-1], w
) ? p
: ++p
;
}
}
void GetHeight()
{
int j
, k
=0;
for(int i
=1; i
<=len
; Height
[Rank
[i
++]]=k
)
for(k
=k
? k
-1 : k
, j
=SA
[Rank
[i
]-1]; s
[i
+k
]==s
[j
+k
]; ++k
);
}
void PrintSA()
{
for(int i
=1; i
<=len
; i
++)
{
printf("%d ", SA
[i
]);
}
printf("\n");
}
void PrintHeight()
{
for(int i
=1; i
<=len
; i
++)
{
printf("%d ", Height
[i
]);
}
printf("\n");
}
void PrintRank()
{
for(int i
=1; i
<=len
; i
++)
{
printf("%d ", Rank
[i
]);
}
printf("\n");
}
}
bool check(int mid
, int m
)
{
int minx
=SA
::SA
[1];
int cnt
=1;
for(int i
=1; i
<=SA
::len
; i
++)
{
if (SA
::Height
[i
]<mid
)
{
cnt
=1;
minx
=SA
::SA
[i
];
}
else
{
cnt
++;
minx
=min(minx
, SA
::SA
[i
]);
if (cnt
>=m
)
return 1;
}
}
return 0;
}
char a
[maxn
+10];
int answer
[maxn
+10];
void solve()
{
int tot
=0;
while(~scanf("%s", a
+1) && a
[1]!='#')
{
int len
=strlen(a
+1);
for(int i
=1; i
<=len
; i
++)
{
SA
::s
[i
]=a
[i
];
}
SA
::s
[len
+1]=0;
SA
::len
=len
;
SA
::m
=200;
SA
::GetSA();
SA
::GetHeight();
ST
::build(len
, SA
::Height
);
int mx
=-1, cnt
=0;
for(int l
=1; l
<=len
; l
++)
{
for(int pos
=1; pos
+l
<=len
; pos
+=l
)
{
int LCP
=ST
::query(SA
::Rank
[pos
], SA
::Rank
[pos
+l
]);
int res
=LCP
/l
+1;
int k
=pos
-(l
-LCP
%l
);
if (k
>=1 && LCP
%l
&& ST
::query(SA
::Rank
[k
], SA
::Rank
[k
+l
]))
{
res
++;
}
if (res
>mx
)
{
mx
=res
;
cnt
=0;
answer
[++cnt
]=l
;
}
else if (res
==mx
)
{
answer
[++cnt
]=l
;
}
}
}
int anslen
=0, anspos
=1;
for(int i
=1; i
<=len
&& !anslen
; i
++)
{
for(int j
=1; j
<=cnt
; j
++)
{
if (ST
::query(SA
::Rank
[SA
::SA
[i
]], SA
::Rank
[SA
::SA
[i
]+answer
[j
]])>=(mx
-1)*answer
[j
])
{
anslen
=answer
[j
];
anspos
=SA
::SA
[i
];
break;
}
}
}
printf("Case %d: ", ++tot
);
for(int i
=anspos
; i
<=anspos
+anslen
*mx
-1; i
++)
{
printf("%c", SA
::s
[i
]);
}
printf("\n");
}
}
int main()
{
#ifdef ACM_LOCAL
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);
long long test_index_for_debug
=1;
char acm_local_for_debug
;
while(cin
>>acm_local_for_debug
)
{
cin
.putback(acm_local_for_debug
);
if (test_index_for_debug
>100)
{
throw runtime_error("Check the stdin!!!");
}
auto start_clock_for_debug
=clock();
solve();
auto end_clock_for_debug
=clock();
cout
<<"\nTest "<<test_index_for_debug
<<" successful"<<endl
;
cerr
<<"Test "<<test_index_for_debug
++<<" Run Time: "
<<double(end_clock_for_debug
-start_clock_for_debug
)/CLOCKS_PER_SEC
<<"s"<<endl
;
cout
<<"--------------------------------------------------"<<endl
;
}
#else
solve();
#endif
return 0;
}
后记
破题写了一天,终于完完全全搞懂,后缀数组写的我快得脑血栓了。 DrGilbert 2020.10.20