gf/text/gstr/gstr_similartext.go

54 lines
1.4 KiB
Go
Raw Normal View History

2021-01-17 21:46:25 +08:00
// Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
2019-02-02 14:22:32 +08:00
//
// This Source Code Form is subject to the terms of the MIT License.
// If a copy of the MIT was not distributed with this file,
// You can obtain one at https://github.com/gogf/gf.
2019-02-02 14:22:32 +08:00
package gstr
2019-04-06 21:31:01 +08:00
// SimilarText calculates the similarity between two strings.
// See http://php.net/manual/en/function.similar-text.php.
func SimilarText(first, second string, percent *float64) int {
2019-06-19 09:06:52 +08:00
var similarText func(string, string, int, int) int
similarText = func(str1, str2 string, len1, len2 int) int {
var sum, max int
pos1, pos2 := 0, 0
2019-06-19 09:06:52 +08:00
// Find the longest segment of the same section in two strings
for i := 0; i < len1; i++ {
for j := 0; j < len2; j++ {
for l := 0; (i+l < len1) && (j+l < len2) && (str1[i+l] == str2[j+l]); l++ {
if l+1 > max {
max = l + 1
pos1 = i
pos2 = j
}
}
}
}
2019-06-19 09:06:52 +08:00
if sum = max; sum > 0 {
if pos1 > 0 && pos2 > 0 {
sum += similarText(str1, str2, pos1, pos2)
}
if (pos1+max < len1) && (pos2+max < len2) {
s1 := []byte(str1)
s2 := []byte(str2)
sum += similarText(string(s1[pos1+max:]), string(s2[pos2+max:]), len1-pos1-max, len2-pos2-max)
}
}
2019-06-19 09:06:52 +08:00
return sum
}
2019-06-19 09:06:52 +08:00
l1, l2 := len(first), len(second)
if l1+l2 == 0 {
return 0
}
sim := similarText(first, second, l1, l2)
if percent != nil {
*percent = float64(sim*200) / float64(l1+l2)
}
return sim
}