gcharset.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
  2. //
  3. // This Source Code Form is subject to the terms of the MIT License.
  4. // If a copy of the MIT was not distributed with this file,
  5. // You can obtain one at https://github.com/gogf/gf.
  6. // Package gcharset implements character-set conversion functionality.
  7. //
  8. // Supported Character Set:
  9. //
  10. // Chinese : GBK/GB18030/GB2312/Big5
  11. //
  12. // Japanese: EUCJP/ISO2022JP/ShiftJIS
  13. //
  14. // Korean : EUCKR
  15. //
  16. // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
  17. //
  18. // Other : macintosh/IBM*/Windows*/ISO-*
  19. package gcharset
  20. import (
  21. "bytes"
  22. "context"
  23. "io/ioutil"
  24. "golang.org/x/text/encoding"
  25. "golang.org/x/text/encoding/ianaindex"
  26. "golang.org/x/text/transform"
  27. "github.com/gogf/gf/v2/errors/gcode"
  28. "github.com/gogf/gf/v2/errors/gerror"
  29. "github.com/gogf/gf/v2/internal/intlog"
  30. )
  31. var (
  32. // Alias for charsets.
  33. charsetAlias = map[string]string{
  34. "HZGB2312": "HZ-GB-2312",
  35. "hzgb2312": "HZ-GB-2312",
  36. "GB2312": "HZ-GB-2312",
  37. "gb2312": "HZ-GB-2312",
  38. }
  39. )
  40. // Supported returns whether charset `charset` is supported.
  41. func Supported(charset string) bool {
  42. return getEncoding(charset) != nil
  43. }
  44. // Convert converts `src` charset encoding from `srcCharset` to `dstCharset`,
  45. // and returns the converted string.
  46. // It returns `src` as `dst` if it fails converting.
  47. func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
  48. if dstCharset == srcCharset {
  49. return src, nil
  50. }
  51. dst = src
  52. // Converting `src` to UTF-8.
  53. if srcCharset != "UTF-8" {
  54. if e := getEncoding(srcCharset); e != nil {
  55. tmp, err := ioutil.ReadAll(
  56. transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
  57. )
  58. if err != nil {
  59. return "", gerror.Wrapf(err, `convert string "%s" to utf8 failed`, srcCharset)
  60. }
  61. src = string(tmp)
  62. } else {
  63. return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported srcCharset "%s"`, srcCharset)
  64. }
  65. }
  66. // Do the converting from UTF-8 to `dstCharset`.
  67. if dstCharset != "UTF-8" {
  68. if e := getEncoding(dstCharset); e != nil {
  69. tmp, err := ioutil.ReadAll(
  70. transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
  71. )
  72. if err != nil {
  73. return "", gerror.Wrapf(err, `convert string from utf8 to "%s" failed`, dstCharset)
  74. }
  75. dst = string(tmp)
  76. } else {
  77. return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported dstCharset "%s"`, dstCharset)
  78. }
  79. } else {
  80. dst = src
  81. }
  82. return dst, nil
  83. }
  84. // ToUTF8 converts `src` charset encoding from `srcCharset` to UTF-8 ,
  85. // and returns the converted string.
  86. func ToUTF8(srcCharset string, src string) (dst string, err error) {
  87. return Convert("UTF-8", srcCharset, src)
  88. }
  89. // UTF8To converts `src` charset encoding from UTF-8 to `dstCharset`,
  90. // and returns the converted string.
  91. func UTF8To(dstCharset string, src string) (dst string, err error) {
  92. return Convert(dstCharset, "UTF-8", src)
  93. }
  94. // getEncoding returns the encoding.Encoding interface object for `charset`.
  95. // It returns nil if `charset` is not supported.
  96. func getEncoding(charset string) encoding.Encoding {
  97. if c, ok := charsetAlias[charset]; ok {
  98. charset = c
  99. }
  100. enc, err := ianaindex.MIB.Encoding(charset)
  101. if err != nil {
  102. intlog.Errorf(context.TODO(), `%+v`, err)
  103. }
  104. return enc
  105. }