Skip to content

Commit 3c5224b

Browse files
committed
💾 Feat: LCS algorithm, now you can get the longest sub-sequences
1 parent 9a7fa72 commit 3c5224b

File tree

10 files changed

+256
-0
lines changed

10 files changed

+256
-0
lines changed

‎.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
## Platform ignores
2+
.DS_Store
3+
14
## Ignore for Rider files
25
.idea/
36

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<TargetFramework>net8.0</TargetFramework>
4+
<ImplicitUsings>enable</ImplicitUsings>
5+
<Nullable>enable</Nullable>
6+
<IsPackable>false</IsPackable>
7+
<IsTestProject>true</IsTestProject>
8+
</PropertyGroup>
9+
<ItemGroup>
10+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.0" />
11+
<PackageReference Include="MSTest.TestAdapter" Version="3.0.4" />
12+
<PackageReference Include="MSTest.TestFramework" Version="3.0.4" />
13+
<PackageReference Include="coverlet.collector" Version="6.0.0" />
14+
</ItemGroup>
15+
<ItemGroup>
16+
<ProjectReference Include="..\Common.Algorithm.Core\Common.Algorithm.Core.csproj" />
17+
</ItemGroup>
18+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global using Microsoft.VisualStudio.TestTools.UnitTesting;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
using System.Diagnostics;
2+
using System.Text;
3+
using Common.Algorithm.Core.Text.Distance.Calculators;
4+
5+
namespace Common.Algorithm.Core.Test.Text.Distance.Calculators;
6+
7+
[TestClass]
8+
public class Test_LCS
9+
{
10+
[TestMethod]
11+
public void TestGetDistanceInfo()
12+
{
13+
var distanceInfo = new LCS().GetDistanceInfo(inputs: ["ABCBDAB", "BDCABA"]);
14+
Assert.AreEqual(4, distanceInfo.Distance);
15+
Assert.IsNotNull(distanceInfo.LcsInfo);
16+
Assert.IsNotNull(distanceInfo.LcsInfo.LcsMatchedSubSequences);
17+
var content = new StringBuilder();
18+
foreach (var pair in distanceInfo.LcsInfo.LcsMatchedSubSequences)
19+
{
20+
content.AppendLine($"+ {pair.Key}: ");
21+
foreach (var sequence in pair.Value)
22+
{
23+
content.AppendLine($" - {sequence}");
24+
}
25+
}
26+
Debug.WriteLine(content.ToString());
27+
var d1 = new Dictionary<int, List<string>>() { { 4, ["BCBA", "BDAB"] } }.Assertable();
28+
var d2 = distanceInfo.LcsInfo.LcsMatchedSubSequences.Assertable();
29+
Debug.WriteLine(d1);
30+
Debug.WriteLine(d2);
31+
Assert.AreEqual(d1, d2);
32+
}
33+
}
34+
35+
public static class LcsTestUtils
36+
{
37+
public static string Assertable(this Dictionary<int, List<string>> dict)
38+
{
39+
return string.Join(
40+
'\n',
41+
dict.OrderBy(p => p.Key)
42+
.Select(p => $"{p.Key}: {string.Join(',', p.Value.OrderBy(s => s))};")
43+
);
44+
}
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net8.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
</PropertyGroup>
8+
9+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
namespace Common.Algorithm.Core.Text.Distance;
2+
3+
public class CalculationOptions
4+
{
5+
public LcsOptions? LcsOptions { get; set; }
6+
}
7+
8+
public class LcsOptions
9+
{
10+
public bool ContainsOnlyLongestSubSequences { get; set; } = true;
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
namespace Common.Algorithm.Core.Text.Distance.Calculators;
2+
3+
public class LCS : IDistanceCalculator
4+
{
5+
private int Width { get; set; }
6+
7+
private int Height { get; set; }
8+
9+
public DistanceInfo GetDistanceInfo(List<string> inputs, CalculationOptions? options = null)
10+
{
11+
if (inputs.Count != 2)
12+
throw new ArgumentOutOfRangeException(
13+
nameof(inputs),
14+
"There should be only two inputs"
15+
);
16+
17+
options ??= new();
18+
19+
var result = GetLcsInfo(inputs[0], inputs[1]);
20+
21+
return new DistanceInfo()
22+
{
23+
OriginalInputs = inputs,
24+
Distance = result.LcsMatchedSubSequences!.Keys.First(),
25+
LcsInfo = result,
26+
};
27+
}
28+
29+
private LcsInfo GetLcsInfo(string a, string b)
30+
{
31+
Width = Math.Max(a.Length, b.Length);
32+
Height = Math.Min(a.Length, b.Length);
33+
34+
var sa = a.Length > b.Length ? b : a;
35+
var sb = a.Length > b.Length ? a : b;
36+
37+
var calMatrix = new int[Height + 1, Width + 1];
38+
var dirMatrix = new int[Height + 1, Width + 1];
39+
40+
var results = new List<string>();
41+
42+
for (var i = 1; i <= Height; ++i)
43+
for (var j = 1; j <= Width; ++j)
44+
{
45+
var same = sa[i - 1] == sb[j - 1];
46+
calMatrix[i, j] = (
47+
same
48+
? calMatrix[i - 1, j - 1] + 1
49+
: Math.Max(calMatrix[i, j - 1], calMatrix[i - 1, j])
50+
);
51+
dirMatrix[i, j] = same ? 1 : (calMatrix[i - 1, j] >= calMatrix[i, j - 1] ? 2 : 3);
52+
}
53+
54+
for (var i = 0; i < Width; ++i)
55+
results.Add("");
56+
57+
for (int j = Width; j >= 1; --j)
58+
Trace(Width - j, Height, Width - (Width - j));
59+
60+
var m = new Dictionary<string, int>();
61+
foreach (var result in results)
62+
if (result.Length == results[0].Length)
63+
m[result] = 1;
64+
results.Clear();
65+
foreach (var pair in m)
66+
if (pair.Value == 1)
67+
results.Add(pair.Key);
68+
69+
return new()
70+
{
71+
LcsMatchedSubSequences = new Dictionary<int, List<string>>
72+
{
73+
{ results[0].Length, results },
74+
},
75+
};
76+
77+
void Trace(int i, int m, int n)
78+
{
79+
if (m == 0 || n == 0)
80+
return;
81+
switch (dirMatrix[m, n])
82+
{
83+
case 1:
84+
Trace(i, m - 1, n - 1);
85+
results[i] = string.Concat(results[i].Append(sa[m - 1]));
86+
break;
87+
case 2:
88+
Trace(i, m - 1, n);
89+
break;
90+
case 3:
91+
Trace(i, m, n - 1);
92+
break;
93+
}
94+
}
95+
}
96+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
namespace Common.Algorithm.Core.Text.Distance;
2+
3+
public class DistanceInfo
4+
{
5+
public List<string>? OriginalInputs { get; set; }
6+
7+
public double Distance { get; set; }
8+
9+
public LcsInfo? LcsInfo { get; set; }
10+
}
11+
12+
public class LcsInfo
13+
{
14+
/// <summary>
15+
/// The matched sub-sequences from LCS algorithm
16+
/// </summary>
17+
/// <example>
18+
/// When calculate distance of `(abbabbc, abbac)`, the result will be:
19+
/// 5, ['abbac']
20+
/// 4, ['abba', 'abbc', 'bbac']
21+
/// ...
22+
///
23+
/// Only longest sub-sequences will be added if you indicated
24+
/// </example>
25+
public Dictionary<int, List<string>>? LcsMatchedSubSequences { get; set; }
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
namespace Common.Algorithm.Core.Text.Distance;
2+
3+
public interface IDistanceCalculator
4+
{
5+
public DistanceInfo GetDistanceInfo(
6+
List<string> inputs,
7+
CalculationOptions options
8+
);
9+
}

‎Common.Algorithm.sln

+38
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,49 @@ Microsoft Visual Studio Solution File, Format Version 12.00
33
# Visual Studio Version 17
44
VisualStudioVersion = 17.1.32228.430
55
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Algorithm.Core", "Common.Algorithm.Core\Common.Algorithm.Core.csproj", "{7515FDE9-F1E0-4F53-8956-1F451034B7E7}"
7+
EndProject
8+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Algorithm.Core.Test", "Common.Algorithm.Core.Test\Common.Algorithm.Core.Test.csproj", "{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}"
9+
EndProject
610
Global
711
GlobalSection(SolutionProperties) = preSolution
812
HideSolutionNode = FALSE
913
EndGlobalSection
1014
GlobalSection(ExtensibilityGlobals) = postSolution
1115
SolutionGuid = {25933558-031B-4AE0-ACDA-96635BF768C3}
1216
EndGlobalSection
17+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
18+
Debug|Any CPU = Debug|Any CPU
19+
Debug|x64 = Debug|x64
20+
Debug|x86 = Debug|x86
21+
Release|Any CPU = Release|Any CPU
22+
Release|x64 = Release|x64
23+
Release|x86 = Release|x86
24+
EndGlobalSection
25+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
26+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
27+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|Any CPU.Build.0 = Debug|Any CPU
28+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x64.ActiveCfg = Debug|Any CPU
29+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x64.Build.0 = Debug|Any CPU
30+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x86.ActiveCfg = Debug|Any CPU
31+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x86.Build.0 = Debug|Any CPU
32+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|Any CPU.ActiveCfg = Release|Any CPU
33+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|Any CPU.Build.0 = Release|Any CPU
34+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x64.ActiveCfg = Release|Any CPU
35+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x64.Build.0 = Release|Any CPU
36+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x86.ActiveCfg = Release|Any CPU
37+
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x86.Build.0 = Release|Any CPU
38+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
39+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|Any CPU.Build.0 = Debug|Any CPU
40+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x64.ActiveCfg = Debug|Any CPU
41+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x64.Build.0 = Debug|Any CPU
42+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x86.ActiveCfg = Debug|Any CPU
43+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x86.Build.0 = Debug|Any CPU
44+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|Any CPU.ActiveCfg = Release|Any CPU
45+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|Any CPU.Build.0 = Release|Any CPU
46+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x64.ActiveCfg = Release|Any CPU
47+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x64.Build.0 = Release|Any CPU
48+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x86.ActiveCfg = Release|Any CPU
49+
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x86.Build.0 = Release|Any CPU
50+
EndGlobalSection
1351
EndGlobal

0 commit comments

Comments
 (0)