gstr_similar.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
  2. //
  3. // This Source Code Form is subject to the terms of the MIT License.
  4. // If a copy of the MIT was not distributed with this file,
  5. // You can obtain one at https://github.com/gogf/gf.
  6. package gstr
  7. // Levenshtein calculates Levenshtein distance between two strings.
  8. // costIns: Defines the cost of insertion.
  9. // costRep: Defines the cost of replacement.
  10. // costDel: Defines the cost of deletion.
  11. // See http://php.net/manual/en/function.levenshtein.php.
  12. func Levenshtein(str1, str2 string, costIns, costRep, costDel int) int {
  13. var maxLen = 255
  14. l1 := len(str1)
  15. l2 := len(str2)
  16. if l1 == 0 {
  17. return l2 * costIns
  18. }
  19. if l2 == 0 {
  20. return l1 * costDel
  21. }
  22. if l1 > maxLen || l2 > maxLen {
  23. return -1
  24. }
  25. tmp := make([]int, l2+1)
  26. p1 := make([]int, l2+1)
  27. p2 := make([]int, l2+1)
  28. var c0, c1, c2 int
  29. var i1, i2 int
  30. for i2 := 0; i2 <= l2; i2++ {
  31. p1[i2] = i2 * costIns
  32. }
  33. for i1 = 0; i1 < l1; i1++ {
  34. p2[0] = p1[0] + costDel
  35. for i2 = 0; i2 < l2; i2++ {
  36. if str1[i1] == str2[i2] {
  37. c0 = p1[i2]
  38. } else {
  39. c0 = p1[i2] + costRep
  40. }
  41. c1 = p1[i2+1] + costDel
  42. if c1 < c0 {
  43. c0 = c1
  44. }
  45. c2 = p2[i2] + costIns
  46. if c2 < c0 {
  47. c0 = c2
  48. }
  49. p2[i2+1] = c0
  50. }
  51. tmp = p1
  52. p1 = p2
  53. p2 = tmp
  54. }
  55. c0 = p1[l2]
  56. return c0
  57. }
  58. // SimilarText calculates the similarity between two strings.
  59. // See http://php.net/manual/en/function.similar-text.php.
  60. func SimilarText(first, second string, percent *float64) int {
  61. var similarText func(string, string, int, int) int
  62. similarText = func(str1, str2 string, len1, len2 int) int {
  63. var sum, max int
  64. pos1, pos2 := 0, 0
  65. // Find the longest segment of the same section in two strings
  66. for i := 0; i < len1; i++ {
  67. for j := 0; j < len2; j++ {
  68. for l := 0; (i+l < len1) && (j+l < len2) && (str1[i+l] == str2[j+l]); l++ {
  69. if l+1 > max {
  70. max = l + 1
  71. pos1 = i
  72. pos2 = j
  73. }
  74. }
  75. }
  76. }
  77. if sum = max; sum > 0 {
  78. if pos1 > 0 && pos2 > 0 {
  79. sum += similarText(str1, str2, pos1, pos2)
  80. }
  81. if (pos1+max < len1) && (pos2+max < len2) {
  82. s1 := []byte(str1)
  83. s2 := []byte(str2)
  84. sum += similarText(string(s1[pos1+max:]), string(s2[pos2+max:]), len1-pos1-max, len2-pos2-max)
  85. }
  86. }
  87. return sum
  88. }
  89. l1, l2 := len(first), len(second)
  90. if l1+l2 == 0 {
  91. return 0
  92. }
  93. sim := similarText(first, second, l1, l2)
  94. if percent != nil {
  95. *percent = float64(sim*200) / float64(l1+l2)
  96. }
  97. return sim
  98. }
  99. // Soundex calculates the soundex key of a string.
  100. // See http://php.net/manual/en/function.soundex.php.
  101. func Soundex(str string) string {
  102. if str == "" {
  103. panic("str: cannot be an empty string")
  104. }
  105. table := [26]rune{
  106. '0', '1', '2', '3', // A, B, C, D
  107. '0', '1', '2', // E, F, G
  108. '0', // H
  109. '0', '2', '2', '4', '5', '5', // I, J, K, L, M, N
  110. '0', '1', '2', '6', '2', '3', // O, P, Q, R, S, T
  111. '0', '1', // U, V
  112. '0', '2', // W, X
  113. '0', '2', // Y, Z
  114. }
  115. last, code, small := -1, 0, 0
  116. sd := make([]rune, 4)
  117. // build soundex string
  118. for i := 0; i < len(str) && small < 4; i++ {
  119. // ToUpper
  120. char := str[i]
  121. if char < '\u007F' && 'a' <= char && char <= 'z' {
  122. code = int(char - 'a' + 'A')
  123. } else {
  124. code = int(char)
  125. }
  126. if code >= 'A' && code <= 'Z' {
  127. if small == 0 {
  128. sd[small] = rune(code)
  129. small++
  130. last = int(table[code-'A'])
  131. } else {
  132. code = int(table[code-'A'])
  133. if code != last {
  134. if code != 0 {
  135. sd[small] = rune(code)
  136. small++
  137. }
  138. last = code
  139. }
  140. }
  141. }
  142. }
  143. // pad with "0"
  144. for ; small < 4; small++ {
  145. sd[small] = '0'
  146. }
  147. return string(sd)
  148. }