BettingArbitrageBot/Arbitrage/StringDistance.cs

175 lines
4.9 KiB
C#
Raw Permalink Normal View History

2022-12-07 12:48:00 +11:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Arbitrage
{
public static class StringDistance
{
/// <summary>
/// Compute the distance between two strings.
/// </summary>
public static int LevenshteinDistance(string s, string t)
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
// Step 1
if (n == 0)
{
return m;
}
if (m == 0)
{
return n;
}
// Step 2
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
for (int j = 0; j <= m; d[0, j] = j++)
{
}
// Step 3
for (int i = 1; i <= n; i++)
{
//Step 4
for (int j = 1; j <= m; j++)
{
// Step 5
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
// Step 6
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
// Step 7
return d[n, m];
}
public static int LengthUnbiasedLevenshteinDistance(string s, string t)
{
int levenshteinDist = LevenshteinDistance(s, t);
return levenshteinDist - Math.Abs(s.Length - t.Length);
}
public static bool TryFindLongestCommonSubstringSlice(string str1, string str2, out int start, out int end)
{
int[,] num = new int[str1.Length, str2.Length];
int maxlen = 0;
int lastSubsBegin = 0;
StringBuilder subStrBuilder = new StringBuilder();
for (int i = 0; i < str1.Length; i++)
{
for (int j = 0; j < str2.Length; j++)
{
if (str1[i] != str2[j])
{
num[i, j] = 0;
}
else
{
if ((i == 0) || (j == 0))
num[i, j] = 1;
else
num[i, j] = 1 + num[i - 1, j - 1];
if (num[i, j] > maxlen)
{
maxlen = num[i, j];
int thisSubsBegin = i - num[i, j] + 1;
if (lastSubsBegin == thisSubsBegin)
{
subStrBuilder.Append(str1[i]);
}
else
{
lastSubsBegin = thisSubsBegin;
subStrBuilder.Length = 0;
subStrBuilder.Append(str1.Substring(lastSubsBegin, (i + 1) - lastSubsBegin));
}
}
}
}
}
if (maxlen > 0)
{
// Inclusive
start = lastSubsBegin;
end = lastSubsBegin + maxlen - 1;
return true;
}
else
{
start = 0;
end = 0;
return false;
}
}
public static double NumberOfSubstringMovementsWithLengthPenalty(string s, string t)
{
s = s.Trim().ToLower();
t = t.Trim().ToLower();
int sLen = s.Length;
int tLen = t.Length;
string shortestString;
string longestString;
if (sLen > tLen)
{
shortestString = t;
longestString = s;
}
else
{
shortestString = s;
longestString = t;
}
int shortestStringLen = shortestString.Length;
List<string> substrings = new List<string>();
while (shortestString.Length > 0)
{
int start;
int end;
if (TryFindLongestCommonSubstringSlice(shortestString, longestString, out start, out end))
{
substrings.Add(shortestString.Substring(start, end - start + 1));
shortestString = shortestString.Remove(start, end - start + 1).Trim();
}
else
{
substrings.Add(shortestString);
break;
}
}
if (substrings.Count == 1)
{
return 0;
}
return substrings.Count / (float)shortestStringLen;
}
}
}