Solution
Give n n n ( ≤ 24 \leq 24 ≤24) strings (each length m ≤ 8 m \leq 8 m≤8), namely S t r [ ] Str[ ] Str[] (a array of strings), you pick some letters from them and form a sentence h e l l o l e e t c o d e helloleetcode helloleetcode. (we call this phrase KeyString)
Due to the small scale of problem data, using Greedy-Algorithm is in vain, consider the DFS or State-Compacted DP.
First of all, the main procedure is, use a map< int, int> mp_cost[ 25]
where mp_cost[ i][ st] = c
means that, for the string
S
t
r
[
i
]
Str[ i]
Str[i],
s
t
st
st is state meaning a subsequence (subset) of
S
t
r
[
i
]
Str[ i]
Str[i] as well as a subsequence of
K
e
y
S
t
r
i
n
g
KeyString
KeyString.
Each string has at most
2
8
2^8
28 valid states (a valid state means that, it is not only a subsequence of
S
t
r
[
i
]
Str[ i]
Str[i] but also a subsequence of
K
e
y
S
t
r
i
n
g
KeyString
KeyString. For instance, the subsequence
x
z
xz
xz of
x
y
z
xyz
xyz is not a valid state, due to it is not a subsequence of
K
e
y
S
t
r
i
n
g
KeyString
KeyString)
For all
24
24
24 strings,
[
2
8
s
t
a
t
e
s
]
[
2
8
s
t
a
t
e
s
]
.
.
.
[2^8 states] [2^8 states] ...
[28states][28states]..., if use
B
r
u
t
e
−
D
F
S
Brute-DFS
Brute−DFS, it is
2
8
24
{2^8}^{24}
2824
But for any
i
i
i and a state
s
t
st
st, there are many ways to form
s
t
st
st with using some states of the front
i
i
i strings, and then
s
t
+
b
a
c
k
=
K
e
y
S
t
r
i
n
g
st + back = KeyString
st+back=KeyString where
b
a
c
k
back
back is using some combinations of the back strings states.
If you use
B
r
u
t
e
−
D
F
S
Brute-DFS
Brute−DFS, then each time you get
s
t
st
st in the front
i
i
i strings, you need renew to calculate
b
a
c
k
back
back, so the
b
a
c
k
back
back is repeated and redundant.
Therefore, use
D
p
[
i
]
[
s
t
]
Dp[ i][ st]
Dp[i][st] to represent
b
a
c
k
back
back, note that,
D
p
Dp
Dp is not meaning the front
s
t
st
st, but the back. Concretely, it means when we already got
s
t
st
st within the front
i
i
i strings (and then using the back
[
i
+
1
,
i
+
2
,
.
.
.
]
[i+1, i+2, ...]
[i+1,i+2,...] strings to making
s
t
st
st become to
K
e
y
S
t
r
i
n
g
KeyString
KeyString finally), the value of
D
p
Dp
Dp representing the minimal
c
o
s
t
cost
cost. So, the answer is
D
p
[
0
]
[
0
]
Dp[ 0][ 0]
Dp[0][0].
This is very different from the past in where
D
p
[
i
]
[
j
]
Dp[ i][ j]
Dp[i][j] means the cost of front
i
i
i, while it means the cost of the back except the front
i
i
i.
If using the usual DP-Definition in here,
D
p
[
i
]
[
s
t
]
+
s
t
t
→
D
p
[
i
+
1
]
[
s
t
+
s
t
t
]
Dp[ i][ st] + {stt} \to Dp[ i + 1][ st + stt]
Dp[i][st]+stt→Dp[i+1][st+stt], then the complexity equals
24
∗
(
2
11
(
a
l
l
p
o
s
s
i
b
l
e
s
t
a
t
e
s
o
f
K
e
y
S
t
r
i
n
g
)
)
∗
2
8
(
a
l
l
s
t
a
t
e
s
o
f
[
i
]
t
h
s
t
r
i
n
g
)
24 * (2^{11} (all\ possible\ states\ of\ KeyString) ) * 2^8 (all\ states\ of\ [i]th\ string)
24∗(211(all possible states of KeyString))∗28(all states of [i]th string), it is also workable. (due to there are many invalid state of
2
11
2^{11}
211, so DFS is a slightly better.
memset( Dp, 0x7F, sizeof( Dp));
Dp[ 0][ 0] = 0; //< no choice at string[ 0], that is very important
for( auto & [ st, cost] : Mp_cost[ 0]){
Dp[ 0][ st] = cost;
}
for( int i = 1; i < N; ++i){
memcpy( Dp[ i], Dp[ i - 1], sizeof( Dp[ i])); //< no choice at string[ i]
for( auto & [ st, cost] : Mp_cost[ i]){
for( int pre_st = 0; pre_st < Full_state; ++pre_st){
if( ( Dp[i - 1][ pre_st] != 0x7F7F7F7F)
&& Check_valid( st, pre_st)){
Dp[ i][ st + pre_st] = min( Dp[ i][ st + pre_st], Dp[ i - 1][ pre_st] + cost);
}
}
}
}
return Dp[ N - 1][ Full_state] == 0x7F7F7F7F ? -1 : Dp[ N - 1][ Full_state];
The length of each string is just 8 8 8, we can obtain its all possible valid subsequences (i.e., a subset of KeyStr) and calculate its cost, and then record it to a map. (that is, [ k e y ] → [ v a l u e ] [key] \to [value] [key]→[value] means that [ a p h r a s e ] → [ c o s t ] [a \ \ phrase] \to [cost] [a phrase]→[cost].
So, firstly, you should hash a phrase to a number (state).
If for a phrase with pairwise uniquely letters, then Hash is simple. (e.g., for phrase
x
y
z
xyz
xyz, you can map
x
→
B
i
t
0
,
y
→
B
i
t
1
,
z
→
B
i
t
2
x \to Bit_0, y \to Bit_1, z \to Bit_2
x→Bit0,y→Bit1,z→Bit2.
However, the phrase contains multiple same letters, that is
x
x
x
y
y
z
xxxyyz
xxxyyz, i.e., to map a multiset to a number (not a set)
The technique which mapping a multiset to number is:
KeyString is “helloleetcode”, the emergence count of each letter is
e: 4, l: 3, o: 2, h: 1, t: 1, c: 1, d: 1
The emergence count of lettere
is 4 ( 100 ) 4 (100) 4(100), it needs 3 3 3 bits, we use B i t 0 , B i t 1 , B i t 2 Bit_0, Bit_1, Bit_2 Bit0,Bit1,Bit2 to record the emergence count ofe
The emergence count of letterl
is 3 ( 11 ) 3 (11) 3(11), it needs 2 2 2 bits, use B i t 3 , B i t 4 Bit_3, Bit_4 Bit3,Bit4 to representl
Finally, for a number, its bit-state is [ 012 ( e ) ] [ 34 ( l ) ] [ 56 ( o ) ] [ 7 ( h ) ] [ 8 ( t ) ] [ 9 ( c ) ] [ 10 ( d ) ] [012 (e)] [34 (l)] [56 (o)] [7 (h)] [8 (t)] [9 (c)] [10 (d)] [012(e)][34(l)][56(o)][7(h)][8(t)][9(c)][10(d)]
Given a number s t st st, s t & 7 st \& 7 st&7 means the count ofe
, ( s t > > 3 ) & 3 (st >> 3) \& 3 (st>>3)&3 represents the count of l l l, and so on.
There are some notes for this technique:
- We call a valid state which is a subset of KeyString.
Given two valid states a , b a, b a,b, the operation a + b a + b a+b may be erroneous when the sum of the emergence count in a , b a, b a,b of any letter is greater than the emergence count in K e y S t r i n g KeyString KeyString.
For instance, a = [ 1100...0 ] , b = [ 0100...0 ] a = [1100...0], b = [0100...0] a=[1100...0],b=[0100...0] (the count of lettere
in a , b a, b a,b are 3 , 2 3, 2 3,2, respectively), its sum is 5 5 5 which is exceed to the limit 4 4 4. - Given a state
s
t
st
st (means a multiset), if we want add a letter to this
s
t
st
st (if the count of this letter in
s
t
st
st is not full), it corresponding to the operation
s
t
+
=
(
1
<
<
B
)
st += (1 << B)
st+=(1<<B) where
B
B
B means the count of bits which are left relate to this letter, we call
B
B
B as the power of this letter.
e.g., B e = 0 , B l = 3 , B o = 5 , . . . B_e = 0, B_l = 3, B_o = 5, ... Be=0,Bl=3,Bo=5,... according to [ 012 ( e ) ] [ 34 ( l ) ] [ 56 ( o ) ] [ 7 ( h ) ] [ 8 ( t ) ] [ 9 ( c ) ] [ 10 ( d ) ] [012 (e)] [34 (l)] [56 (o)] [7 (h)] [8 (t)] [9 (c)] [10 (d)] [012(e)][34(l)][56(o)][7(h)][8(t)][9(c)][10(d)] - If two state
a
,
b
a, b
a,b can be added (that is, for any letter
x
x
x, the sum of the count of
x
x
x in
a
a
a and the count of
x
x
x in
b
b
b, is not greater than the limit of
x
x
x in
K
e
y
S
t
r
i
n
g
KeyString
KeyString, then you can just
a
+
b
a + b
a+b to merge the two multiset.
Proof:
According to the property above, b b b equals C e ∗ P e + C l ∗ P l + . . . C_e * P_e + C_l * P_l + ... Ce∗Pe+Cl∗Pl+... where C x C_x Cx means the count of x x x, P x P_x Px means the power of x x x (a state added by P x P_x Px, means added a letter x x x to original multiset (the premise if the count of x x x in original state is not full) )
Then, we can execute a = a + P e a = a + P_e a=a+Pe in C e C_e Ce times, (according to the above property, these operations are all valid), same as a = a + P x a = a + P_x a=a+Px in C x C_x Cx times.
Now, the first step is, for a string s t r str str (length is 8 8 8), get its all possible states, there are two methods:
- Relying on G r e e d y Greedy Greedy, if we get X Y Z XYZ XYZ from a b X c Y d e Z f abXcYdeZf abXcYdeZf, the minimal cost is Z → X → Y Z \to X \to Y Z→X→Y according to Z Z Z is more marginal then X X X.
- Due to the length is just
8
8
8, we can use Brute-DFS (complexity
8
!
8!
8!), same as get
X
Y
Z
XYZ
XYZ from
a
b
X
c
Y
d
e
Z
f
abXcYdeZf
abXcYdeZf, we will try
X
Y
Z
,
X
Z
Y
,
Y
X
Z
,
Y
Z
X
,
.
.
.
XYZ, XZY, YXZ, YZX, ...
XYZ,XZY,YXZ,YZX,... (e.g., as
Y
X
Z
YXZ
YXZ,
d
f
s
(
a
b
X
c
Y
d
e
Z
f
)
→
d
f
s
(
a
b
X
c
d
e
Z
f
)
→
d
f
s
(
a
b
c
d
e
Z
f
)
→
d
f
s
(
a
b
c
d
e
f
)
dfs( abXcYdeZf) \to dfs( abXcdeZf) \to dfs( abcdeZf) \to dfs( abcdef)
dfs(abXcYdeZf)→dfs(abXcdeZf)→dfs(abcdeZf)→dfs(abcdef)
One important thing is, if we already got the cost of s t = [ X Y ] st = [XY] st=[XY] is 10 10 10, now we got s t = [ X Y ] st = [XY] st=[XY] again, but now its cost is 11 11 11, then can we ignore this case? The answer is no! Due to different choices of [ X Y ] [XY] [XY] left different status of the remain string (e.g., a X b X c Y d aXbXcYd aXbXcYd, you get the same [ X Y ] [XY] [XY] leaving a b X c d abXcd abXcd or a X b c d aXbcd aXbcd. In other words, a optimal choice of [ X Y Z ] [XYZ] [XYZ] do not meaning the same choice of its subprocedure [ X Y ] [XY] [XY] is optimal. (Due to the small length, just be B r u t e Brute Brute).
Code
#define MAP_CONTAIN_( _map, _key) (_map.find( _key) != _map.end())
/*
helloleetcode
e: 4, l: 3, o: 2, h: 1, t: 1, c: 1, d: 1
bit: [012 (e)] [34 (l)] [56 (o)] [7 (h)] [8 (t)] [9 (c)] [10 (d)]
*/
class Solution {
public:
unordered_map< int, int> Mp_cost[ 25];
unordered_map< char, vector< int> > Mp_info;
int N;
int Full_state;
int Ans;
int Dp[ 25][ 1 << 11];
//--
bool Bit_add( int & _st, char _c){
if( Mp_info.find( _c) != Mp_info.end()){
if( ( ( _st >> Mp_info[ _c][ 2]) & Mp_info[ _c][ 3]) < Mp_info[ _c][ 1]){
_st += Mp_info[ _c][ 0];
return true;
}
}
return false;
}
void Pre_handle( int _ind, const string & _str){
static bool valid[ 8];
int n = _str.size();
for( int i = 1; i < (1 << n); ++i){
int st = 0;
for( int j = 0; j < n; ++j){
if( (i >> j) & 1){
if( false == Bit_add( st, _str[ j])){
st = 0;
break;
}
}
}
if( st != 0){
memset( valid, true, sizeof( valid));
int cost = 0;
{
while( true){
int ind = -1;
int d = 0x3F, costt;
for( int j = 0; j < n; ++j){
if( ( (i >> j) & 1) && valid[ j]){
int l1 = 0, l2 = 0;
for( int z = 0; z < j; ++z){
if( valid[ z]){ ++ l1;}
}
for( int z = j + 1; z < n; ++z){
if( valid[ z]){ ++ l2;}
}
if( min( l1, l2) < d){
d = min( l1, l2);
ind = j;
costt = (l1 * l2);
}
}
}
if( ind == -1){ break;}
valid[ ind] = false;
cost += costt;
}
}
if( false == MAP_CONTAIN_( Mp_cost[ _ind], st)){
Mp_cost[ _ind][ st] = cost;
}
else{
Mp_cost[ _ind][ st] = min( Mp_cost[ _ind][ st], cost);
}
}
}
}
void Pre_handle_2( int _ind, const string & _str){
function<void(string,int,int)> dfs = [&]( string __s, int __st, int __tot) -> void{
for( int i = 0; i < __s.size(); ++i){
int st = __st;
if( Bit_add( st, __s[ i])){
int cost = __tot + (i * (__s.size() - i - 1));
if( false == MAP_CONTAIN_( Mp_cost[ _ind], st)){
Mp_cost[ _ind][ st] = cost;
}
else{
Mp_cost[ _ind][ st] = min( Mp_cost[ _ind][ st], cost);
}
dfs( __s.substr( 0, i) + __s.substr( i + 1, __s.size() - i - 1), st, cost);
}
}
};
dfs( _str, 0, 0);
}
bool Check_valid( int _a, int _b){ //< check whether the operation (a+b) is allowable
for( auto & [c, info] : Mp_info){
int limit = info[ 1];
int c1 = ( ( _a >> info[ 2]) & info[ 3]);
int c2 = ( ( _b >> info[ 2]) & info[ 3]);
assert( c1 <= limit);
assert( c2 <= limit);
if( c1 + c2 > limit){
return false;
}
}
return true;
}
int Dfs( int _ind, int _pre){
if( _ind == N){
if( _pre == Full_state){ return 0;}
return 0x3F3F3F3F;
}
if( Dp[ _ind][ _pre] != -1){ return Dp[ _ind][ _pre];}
int ans = 0x3F3F3F3F;
ans = min( ans, Dfs( _ind + 1, _pre)); //< no choice at current-string, very vital
for( auto & [ st, cost] : Mp_cost[ _ind]){
if( Check_valid( _pre, st)){
auto ret = Dfs( _ind + 1, _pre + st);
if( ret != 0x3F3F3F3F){
ans = min( ans, cost + ret);
}
}
}
return Dp[ _ind][ _pre] = ans;
}
int Leetcode(vector<string>& A) {
Mp_info[ 'e'] = {1 << 0, 4, 0, 7};
Mp_info[ 'l'] = {1 << 3, 3, 3, 3};
Mp_info[ 'o'] = {1 << 5, 2, 5, 3};
Mp_info[ 'h'] = {1 << 7, 1, 7, 1};
Mp_info[ 't'] = {1 << 8, 1, 8, 1};
Mp_info[ 'c'] = {1 << 9, 1, 9, 1};
Mp_info[ 'd'] = {1 << 10, 1, 10, 1};
Full_state = 0;
for( auto & [c, info] : Mp_info){
Full_state += info[ 1] * info[ 0];
}
//--
memset( Dp, -1, sizeof( Dp));
N = A.size();
for( int i = 0; i < N; ++i){
Pre_handle( i, A[ i]);
}
Ans = Dfs( 0, 0);
return Ans == 0x3F3F3F3F ? -1 : Ans;
}
};