增加mahonia第三方字符集编码转换包,完善gcharset及gxml包编码

This commit is contained in:
John 2018-06-07 08:37:24 +08:00
parent 3af59aff40
commit adf59ef9f2
4 changed files with 23 additions and 24 deletions

View File

@ -1,14 +1,13 @@
// Copyright 2017 gf Author(https://gitee.com/johng/gf). All Rights Reserved.
// Copyright 2018 gf Author(https://gitee.com/johng/gf). All Rights Reserved.
//
// This Source Code Form is subject to the terms of the MIT License.
// If a copy of the MIT was not distributed with this file,
// You can obtain one at https://gitee.com/johng/gf.
// @author wenzi1
// @date 20180604
//gcharset
//@author wenzi1
//@date 20180604
//字符集转换方法.
//使用mahonia实现的字符集转换方法支持的字符集包括常见的utf8/UTF-16/UTF-16LE/macintosh/big5/gbk/gb18030,支持的全量字符集可以参考mahonia包
// 字符集转换方法.
// 使用mahonia实现的字符集转换方法支持的字符集包括常见的utf8/UTF-16/UTF-16LE/macintosh/big5/gbk/gb18030,支持的全量字符集可以参考mahonia包
package gcharset
import (
@ -18,7 +17,7 @@ import (
)
//2个字符集之间的转换
// 2个字符集之间的转换
func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
s := mahonia.GetCharset(srcCharset)
if s == nil {
@ -43,12 +42,12 @@ func Convert(dstCharset string, srcCharset string, src string) (dst string, err
return dst, nil
}
//指定字符集转UTF8
// 指定字符集转UTF8
func ToUTF8(charset string, src string) (dst string, err error) {
return Convert("UTF-8", charset, src)
}
//UTF8转指定字符集
// UTF8转指定字符集
func UTF8To(charset string, src string) (dst string, err error) {
return Convert(charset, "UTF-8", src)
}

View File

@ -1,8 +1,7 @@
package gcharset_test
package gcharset
import (
"testing"
"gitee.com/wenzi1/gf/g/encoding/gcharset"
)
@ -53,7 +52,7 @@ var testData = []struct {
func TestDecode(t *testing.T) {
for _, data := range testData {
str := ""
str, err := gcharset.Convert("UTF-8", data.otherEncoding, data.other)
str, err := Convert("UTF-8", data.otherEncoding, data.other)
if err != nil {
t.Errorf("Could not create decoder for %v", err)
continue
@ -69,7 +68,7 @@ func TestDecode(t *testing.T) {
func TestEncode(t *testing.T) {
for _, data := range testData {
str := ""
str, err := gcharset.Convert(data.otherEncoding, "UTF-8", data.utf8)
str, err := Convert(data.otherEncoding, "UTF-8", data.utf8)
if err != nil {
t.Errorf("Could not create decoder for %v", err)
continue
@ -87,7 +86,7 @@ func TestConvert(t *testing.T) {
dstCharset := "gbk"
dst := "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed"
str, err := gcharset.Convert(dstCharset, srcCharset, src)
str, err := Convert(dstCharset, srcCharset, src)
if err != nil {
t.Errorf("convert error. %v", err)
return

View File

@ -15,11 +15,12 @@ import (
"github.com/axgle/mahonia"
"errors"
"fmt"
"strings"
)
// 将XML内容解析为map变量
func Decode(xmlbyte []byte) (map[string]interface{}, error) {
Prepare(xmlbyte)
prepare(xmlbyte)
return mxj.NewMapXml(xmlbyte)
}
@ -34,7 +35,7 @@ func EncodeWithIndent(v map[string]interface{}, rootTag...string) ([]byte, error
// XML格式内容直接转换为JSON格式内容
func ToJson(xmlbyte []byte) ([]byte, error) {
Prepare(xmlbyte)
prepare(xmlbyte)
mv, err := mxj.NewMapXml(xmlbyte)
if err == nil {
return mv.Json()
@ -43,11 +44,11 @@ func ToJson(xmlbyte []byte) ([]byte, error) {
}
}
//XML字符集预处理
//@author wenzi1
//@date 20180604
func Prepare(xmlbyte []byte) error {
patten := "<\\?xml\\s+version\\s*=.*?\\s+encoding\\s*=\\s*[\\'|\"](.*?)[\\'|\"]\\s*\\?\\s*>"
// XML字符集预处理
// @author wenzi1
// @date 20180604
func prepare(xmlbyte []byte) error {
patten := `<\?xml.*encoding\s*=\s*['|"](.*?)['|"].*\?>`
charsetReader := func(charset string, input io.Reader) (io.Reader, error) {
reader := mahonia.GetCharset(charset)
if reader == nil {
@ -66,8 +67,8 @@ func Prepare(xmlbyte []byte) error {
return errors.New(fmt.Sprintf("not support charset:%s", matchStr[1]))
}
if charset.Name != "UTF-8" {
mxj.CustomDecoder = &xml.Decoder{Strict:false,CharsetReader:charsetReader}
if !strings.EqualFold(charset.Name, "UTF-8") {
mxj.CustomDecoder = &xml.Decoder{Strict : false, CharsetReader : charsetReader}
}
return nil
}

View File

@ -6,7 +6,7 @@ import (
)
func main() {
a , e := gregx.MatchString(`(.+):(\d+),{0,1}(\d*),{0,1}(.*)`, "127.0.0.1:12333")
a , e := gregx.MatchString(`<\?xml.*encoding\s*=\s*['|"](.*?)['|"].*\?>`, `<?xml version= '1.0' encoding = "utf-8" ?>`)
fmt.Println(e)
for k, v := range a {
fmt.Printf("%d:%v\n", k, v)