123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
- //
- // This Source Code Form is subject to the terms of the MIT License.
- // If a copy of the MIT was not distributed with this file,
- // You can obtain one at https://github.com/gogf/gf.
- package gstr
- // Levenshtein calculates Levenshtein distance between two strings.
- // costIns: Defines the cost of insertion.
- // costRep: Defines the cost of replacement.
- // costDel: Defines the cost of deletion.
- // See http://php.net/manual/en/function.levenshtein.php.
- func Levenshtein(str1, str2 string, costIns, costRep, costDel int) int {
- var maxLen = 255
- l1 := len(str1)
- l2 := len(str2)
- if l1 == 0 {
- return l2 * costIns
- }
- if l2 == 0 {
- return l1 * costDel
- }
- if l1 > maxLen || l2 > maxLen {
- return -1
- }
- tmp := make([]int, l2+1)
- p1 := make([]int, l2+1)
- p2 := make([]int, l2+1)
- var c0, c1, c2 int
- var i1, i2 int
- for i2 := 0; i2 <= l2; i2++ {
- p1[i2] = i2 * costIns
- }
- for i1 = 0; i1 < l1; i1++ {
- p2[0] = p1[0] + costDel
- for i2 = 0; i2 < l2; i2++ {
- if str1[i1] == str2[i2] {
- c0 = p1[i2]
- } else {
- c0 = p1[i2] + costRep
- }
- c1 = p1[i2+1] + costDel
- if c1 < c0 {
- c0 = c1
- }
- c2 = p2[i2] + costIns
- if c2 < c0 {
- c0 = c2
- }
- p2[i2+1] = c0
- }
- tmp = p1
- p1 = p2
- p2 = tmp
- }
- c0 = p1[l2]
- return c0
- }
- // SimilarText calculates the similarity between two strings.
- // See http://php.net/manual/en/function.similar-text.php.
- func SimilarText(first, second string, percent *float64) int {
- var similarText func(string, string, int, int) int
- similarText = func(str1, str2 string, len1, len2 int) int {
- var sum, max int
- pos1, pos2 := 0, 0
- // Find the longest segment of the same section in two strings
- for i := 0; i < len1; i++ {
- for j := 0; j < len2; j++ {
- for l := 0; (i+l < len1) && (j+l < len2) && (str1[i+l] == str2[j+l]); l++ {
- if l+1 > max {
- max = l + 1
- pos1 = i
- pos2 = j
- }
- }
- }
- }
- if sum = max; sum > 0 {
- if pos1 > 0 && pos2 > 0 {
- sum += similarText(str1, str2, pos1, pos2)
- }
- if (pos1+max < len1) && (pos2+max < len2) {
- s1 := []byte(str1)
- s2 := []byte(str2)
- sum += similarText(string(s1[pos1+max:]), string(s2[pos2+max:]), len1-pos1-max, len2-pos2-max)
- }
- }
- return sum
- }
- l1, l2 := len(first), len(second)
- if l1+l2 == 0 {
- return 0
- }
- sim := similarText(first, second, l1, l2)
- if percent != nil {
- *percent = float64(sim*200) / float64(l1+l2)
- }
- return sim
- }
- // Soundex calculates the soundex key of a string.
- // See http://php.net/manual/en/function.soundex.php.
- func Soundex(str string) string {
- if str == "" {
- panic("str: cannot be an empty string")
- }
- table := [26]rune{
- '0', '1', '2', '3', // A, B, C, D
- '0', '1', '2', // E, F, G
- '0', // H
- '0', '2', '2', '4', '5', '5', // I, J, K, L, M, N
- '0', '1', '2', '6', '2', '3', // O, P, Q, R, S, T
- '0', '1', // U, V
- '0', '2', // W, X
- '0', '2', // Y, Z
- }
- last, code, small := -1, 0, 0
- sd := make([]rune, 4)
- // build soundex string
- for i := 0; i < len(str) && small < 4; i++ {
- // ToUpper
- char := str[i]
- if char < '\u007F' && 'a' <= char && char <= 'z' {
- code = int(char - 'a' + 'A')
- } else {
- code = int(char)
- }
- if code >= 'A' && code <= 'Z' {
- if small == 0 {
- sd[small] = rune(code)
- small++
- last = int(table[code-'A'])
- } else {
- code = int(table[code-'A'])
- if code != last {
- if code != 0 {
- sd[small] = rune(code)
- small++
- }
- last = code
- }
- }
- }
- }
- // pad with "0"
- for ; small < 4; small++ {
- sd[small] = '0'
- }
- return string(sd)
- }
|