Given an array of strings words
, return the smallest string that contains each string in words
as a substring. If there are multiple valid strings of the smallest length, return any of them.
You may assume that no string in words
is a substring of another string in words.
文章目录
Example 1:
Input: words = ["alex","loves","leetcode"]
Output: "alexlovesleetcode"
Explanation: All permutations of "alex","loves","leetcode" would also be accepted.
Example 2:
Input: words = ["catg","ctaagt","gcta","ttca","atgcatc"]
Output: "gctaagttcatgcatc"
问题理解
我们需要找到一个最短的字符串S,使得给定的字符串数组A中的所有字符串都是S的子串。例如:
- 输入:["alex","loves","leetcode"]
- 输出:"alexlovesleetcode"(也可以是其他排列组合)
分步解法
第一步:计算字符串间的重叠部分
目标: 对于每对字符串A[i]
和A[j]
,计算A[j]
可以重叠在A[i]
后面的最大字符数。
实现:
cpp
int calcOverlap(const string& a, const string& b) {
int max_len = min(a.length(), b.length());
for (int k = max_len; k >= 1; --k) {
if (a.substr(a.length() - k) == b.substr(0, k)) {
return k;
}
}
return 0;
}
示例:
calcOverlap("alex", "loves") → 1 ("x"和"l"不匹配,"e"和"l"不匹配,"l"和"l"匹配)
calcOverlap("loves", "leetcode") → 2 ("es"和"le"不匹配,"es"和"le"不匹配,"oves"和"leet"不匹配,"ves"和"eet"不匹配)
第二步:初始化动态规划表
定义状态:
dp[mask][i]
:使用mask
代表的字符串集合,且最后一个字符串是A[i]时的最短超级字符串长度parent[mask][i]
:记录前一个字符串的索引
初始化:
cpp
vector<vector<int>> dp(1 << n, vector<int>(n, INT_MAX));
vector<vector<int>> parent(1 << n, vector<int>(n, -1));
// 只包含一个字符串的情况
for (int i = 0; i < n; ++i) {
dp[1 << i][i] = A[i].length();
}
解释: 当mask
只有第i
位为1时,最短长度就是A[i]
本身的长度
第三步:状态转移
转移方程:
对于每个状态mask
和每个可能的最后一个字符串i
:
- 找到不包含
i
的前一个状态prev_mask
- 遍历
prev_mask
中的所有可能的前一个字符串j - 计算新长度
= dp[prev_mask][j] + len(A[i]) - overlap[j][i]
- 如果新长度更优,则更新
dp
表和parent
表
实现:
cpp
for (int mask = 1; mask < (1 << n); ++mask) {
for (int i = 0; i < n; ++i) {
if (!(mask & (1 << i))) continue;
int prev_mask = mask ^ (1 << i);
if (prev_mask == 0) continue;
for (int j = 0; j < n; ++j) {
if (prev_mask & (1 << j)) {
int new_len = dp[prev_mask][j] + A[i].length() - overlap[j][i];
if (new_len < dp[mask][i]) {
dp[mask][i] = new_len;
parent[mask][i] = j;
}
}
}
}
}
第四步:找到最优解
步骤:
- 找到包含所有字符串的状态
mask = (1<<n)-1
- 在这个状态下找到长度最小的
dp[mask][i]
- 记录最后一个字符串的索引
last
cpp
int mask = (1 << n) - 1;
int last = min_element(dp[mask].begin(), dp[mask].end()) - dp[mask].begin();
第五步:回溯构建结果字符串
步骤:
- 从最后一个字符串开始,通过parent表回溯路径
- 反转路径得到正确的顺序
- 按照路径顺序拼接字符串,利用预先计算的重叠部分
cpp
vector<int> path;
int curr_mask = mask;
int curr = last;
while (curr_mask) {
path.push_back(curr);
int prev = parent[curr_mask][curr];
if (prev == -1) break;
curr_mask ^= (1 << curr);
curr = prev;
}
reverse(path.begin(), path.end());
string result = A[path[0]];
for (int i = 1; i < path.size(); ++i) {
int prev = path[i-1];
int curr = path[i];
int overlap_len = overlap[prev][curr];
result += A[curr].substr(overlap_len);
}
C++ 完整实现
cpp
#include <vector>
#include <string>
#include <algorithm>
#include <climits>
using namespace std;
class Solution {
public:
string shortestSuperstring(vector<string>& A) {
int n = A.size();
// 计算重叠矩阵
vector<vector<int>> overlap(n, vector<int>(n, 0));
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
if (i != j) {
overlap[i][j] = calcOverlap(A[i], A[j]);
}
}
}
// DP表:dp[mask][i] 表示使用mask代表的字符串集合,最后一个字符串是i时的最短长度
vector<vector<int>> dp(1 << n, vector<int>(n, INT_MAX));
vector<vector<int>> parent(1 << n, vector<int>(n, -1));
// 初始化:只包含一个字符串的情况
for (int i = 0; i < n; ++i) {
dp[1 << i][i] = A[i].length();
}
// 状态转移
for (int mask = 1; mask < (1 << n); ++mask) {
for (int i = 0; i < n; ++i) {
if (!(mask & (1 << i))) continue;
int prev_mask = mask ^ (1 << i);
if (prev_mask == 0) continue;
for (int j = 0; j < n; ++j) {
if (prev_mask & (1 << j)) {
int new_len = dp[prev_mask][j] + A[i].length() - overlap[j][i];
if (new_len < dp[mask][i]) {
dp[mask][i] = new_len;
parent[mask][i] = j;
}
}
}
}
}
// 找到最优解
int mask = (1 << n) - 1;
int last = min_element(dp[mask].begin(), dp[mask].end()) - dp[mask].begin();
int min_len = dp[mask][last];
// 回溯构建结果字符串
string result;
int curr_mask = mask;
int curr = last;
vector<int> path;
while (curr_mask) {
path.push_back(curr);
int prev = parent[curr_mask][curr];
if (prev == -1) break;
curr_mask ^= (1 << curr);
curr = prev;
}
reverse(path.begin(), path.end());
// 构建最终字符串
result = A[path[0]];
for (int i = 1; i < path.size(); ++i) {
int prev = path[i-1];
int curr = path[i];
int overlap_len = overlap[prev][curr];
result += A[curr].substr(overlap_len);
}
return result;
}
private:
// 计算两个字符串的最大重叠长度
int calcOverlap(const string& a, const string& b) {
int max_len = min(a.length(), b.length());
for (int k = max_len; k >= 1; --k) {
if (a.substr(a.length() - k) == b.substr(0, k)) {
return k;
}
}
return 0;
}
};
复杂度分析
-
时间复杂度: O ( n 2 ⋅ 2 n ) O(n²·2ⁿ) O(n2⋅2n)
- 外层循环 2 n 2ⁿ 2n个状态
- 内层循环 n n n个字符串
- 每个状态转移最多考虑 n n n种可能
-
空间复杂度: O ( n ⋅ 2 n ) O(n·2ⁿ) O(n⋅2n)
- 存储DP表和parent表