123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
- //
- // This Source Code Form is subject to the terms of the MIT License.
- // If a copy of the MIT was not distributed with this file,
- // You can obtain one at https://github.com/gogf/gf.
- // Package gcharset implements character-set conversion functionality.
- //
- // Supported Character Set:
- //
- // Chinese : GBK/GB18030/GB2312/Big5
- //
- // Japanese: EUCJP/ISO2022JP/ShiftJIS
- //
- // Korean : EUCKR
- //
- // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
- //
- // Other : macintosh/IBM*/Windows*/ISO-*
- package gcharset
- import (
- "bytes"
- "context"
- "io/ioutil"
- "golang.org/x/text/encoding"
- "golang.org/x/text/encoding/ianaindex"
- "golang.org/x/text/transform"
- "github.com/gogf/gf/v2/errors/gcode"
- "github.com/gogf/gf/v2/errors/gerror"
- "github.com/gogf/gf/v2/internal/intlog"
- )
- var (
- // Alias for charsets.
- charsetAlias = map[string]string{
- "HZGB2312": "HZ-GB-2312",
- "hzgb2312": "HZ-GB-2312",
- "GB2312": "HZ-GB-2312",
- "gb2312": "HZ-GB-2312",
- }
- )
- // Supported returns whether charset `charset` is supported.
- func Supported(charset string) bool {
- return getEncoding(charset) != nil
- }
- // Convert converts `src` charset encoding from `srcCharset` to `dstCharset`,
- // and returns the converted string.
- // It returns `src` as `dst` if it fails converting.
- func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
- if dstCharset == srcCharset {
- return src, nil
- }
- dst = src
- // Converting `src` to UTF-8.
- if srcCharset != "UTF-8" {
- if e := getEncoding(srcCharset); e != nil {
- tmp, err := ioutil.ReadAll(
- transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
- )
- if err != nil {
- return "", gerror.Wrapf(err, `convert string "%s" to utf8 failed`, srcCharset)
- }
- src = string(tmp)
- } else {
- return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported srcCharset "%s"`, srcCharset)
- }
- }
- // Do the converting from UTF-8 to `dstCharset`.
- if dstCharset != "UTF-8" {
- if e := getEncoding(dstCharset); e != nil {
- tmp, err := ioutil.ReadAll(
- transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
- )
- if err != nil {
- return "", gerror.Wrapf(err, `convert string from utf8 to "%s" failed`, dstCharset)
- }
- dst = string(tmp)
- } else {
- return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported dstCharset "%s"`, dstCharset)
- }
- } else {
- dst = src
- }
- return dst, nil
- }
- // ToUTF8 converts `src` charset encoding from `srcCharset` to UTF-8 ,
- // and returns the converted string.
- func ToUTF8(srcCharset string, src string) (dst string, err error) {
- return Convert("UTF-8", srcCharset, src)
- }
- // UTF8To converts `src` charset encoding from UTF-8 to `dstCharset`,
- // and returns the converted string.
- func UTF8To(dstCharset string, src string) (dst string, err error) {
- return Convert(dstCharset, "UTF-8", src)
- }
- // getEncoding returns the encoding.Encoding interface object for `charset`.
- // It returns nil if `charset` is not supported.
- func getEncoding(charset string) encoding.Encoding {
- if c, ok := charsetAlias[charset]; ok {
- charset = c
- }
- enc, err := ianaindex.MIB.Encoding(charset)
- if err != nil {
- intlog.Errorf(context.TODO(), `%+v`, err)
- }
- return enc
- }
|