mirror of
https://gitee.com/johng/gf.git
synced 2024-12-02 12:17:53 +08:00
增加mahonia第三方字符集编码转换包,完善gcharset及gxml包编码
This commit is contained in:
parent
3af59aff40
commit
adf59ef9f2
@ -1,14 +1,13 @@
|
||||
// Copyright 2017 gf Author(https://gitee.com/johng/gf). All Rights Reserved.
|
||||
// Copyright 2018 gf Author(https://gitee.com/johng/gf). All Rights Reserved.
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the MIT License.
|
||||
// If a copy of the MIT was not distributed with this file,
|
||||
// You can obtain one at https://gitee.com/johng/gf.
|
||||
// @author wenzi1
|
||||
// @date 20180604
|
||||
|
||||
//gcharset
|
||||
//@author wenzi1
|
||||
//@date 20180604
|
||||
//字符集转换方法.
|
||||
//使用mahonia实现的字符集转换方法,支持的字符集包括常见的utf8/UTF-16/UTF-16LE/macintosh/big5/gbk/gb18030,支持的全量字符集可以参考mahonia包
|
||||
// 字符集转换方法.
|
||||
// 使用mahonia实现的字符集转换方法,支持的字符集包括常见的utf8/UTF-16/UTF-16LE/macintosh/big5/gbk/gb18030,支持的全量字符集可以参考mahonia包
|
||||
package gcharset
|
||||
|
||||
import (
|
||||
@ -18,7 +17,7 @@ import (
|
||||
)
|
||||
|
||||
|
||||
//2个字符集之间的转换
|
||||
// 2个字符集之间的转换
|
||||
func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
|
||||
s := mahonia.GetCharset(srcCharset)
|
||||
if s == nil {
|
||||
@ -43,12 +42,12 @@ func Convert(dstCharset string, srcCharset string, src string) (dst string, err
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
//指定字符集转UTF8
|
||||
// 指定字符集转UTF8
|
||||
func ToUTF8(charset string, src string) (dst string, err error) {
|
||||
return Convert("UTF-8", charset, src)
|
||||
}
|
||||
|
||||
//UTF8转指定字符集
|
||||
// UTF8转指定字符集
|
||||
func UTF8To(charset string, src string) (dst string, err error) {
|
||||
return Convert(charset, "UTF-8", src)
|
||||
}
|
@ -1,8 +1,7 @@
|
||||
package gcharset_test
|
||||
package gcharset
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"gitee.com/wenzi1/gf/g/encoding/gcharset"
|
||||
)
|
||||
|
||||
|
||||
@ -53,7 +52,7 @@ var testData = []struct {
|
||||
func TestDecode(t *testing.T) {
|
||||
for _, data := range testData {
|
||||
str := ""
|
||||
str, err := gcharset.Convert("UTF-8", data.otherEncoding, data.other)
|
||||
str, err := Convert("UTF-8", data.otherEncoding, data.other)
|
||||
if err != nil {
|
||||
t.Errorf("Could not create decoder for %v", err)
|
||||
continue
|
||||
@ -69,7 +68,7 @@ func TestDecode(t *testing.T) {
|
||||
func TestEncode(t *testing.T) {
|
||||
for _, data := range testData {
|
||||
str := ""
|
||||
str, err := gcharset.Convert(data.otherEncoding, "UTF-8", data.utf8)
|
||||
str, err := Convert(data.otherEncoding, "UTF-8", data.utf8)
|
||||
if err != nil {
|
||||
t.Errorf("Could not create decoder for %v", err)
|
||||
continue
|
||||
@ -87,7 +86,7 @@ func TestConvert(t *testing.T) {
|
||||
dstCharset := "gbk"
|
||||
dst := "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed"
|
||||
|
||||
str, err := gcharset.Convert(dstCharset, srcCharset, src)
|
||||
str, err := Convert(dstCharset, srcCharset, src)
|
||||
if err != nil {
|
||||
t.Errorf("convert error. %v", err)
|
||||
return
|
||||
|
@ -15,11 +15,12 @@ import (
|
||||
"github.com/axgle/mahonia"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// 将XML内容解析为map变量
|
||||
func Decode(xmlbyte []byte) (map[string]interface{}, error) {
|
||||
Prepare(xmlbyte)
|
||||
prepare(xmlbyte)
|
||||
return mxj.NewMapXml(xmlbyte)
|
||||
}
|
||||
|
||||
@ -34,7 +35,7 @@ func EncodeWithIndent(v map[string]interface{}, rootTag...string) ([]byte, error
|
||||
|
||||
// XML格式内容直接转换为JSON格式内容
|
||||
func ToJson(xmlbyte []byte) ([]byte, error) {
|
||||
Prepare(xmlbyte)
|
||||
prepare(xmlbyte)
|
||||
mv, err := mxj.NewMapXml(xmlbyte)
|
||||
if err == nil {
|
||||
return mv.Json()
|
||||
@ -43,11 +44,11 @@ func ToJson(xmlbyte []byte) ([]byte, error) {
|
||||
}
|
||||
}
|
||||
|
||||
//XML字符集预处理
|
||||
//@author wenzi1
|
||||
//@date 20180604
|
||||
func Prepare(xmlbyte []byte) error {
|
||||
patten := "<\\?xml\\s+version\\s*=.*?\\s+encoding\\s*=\\s*[\\'|\"](.*?)[\\'|\"]\\s*\\?\\s*>"
|
||||
// XML字符集预处理
|
||||
// @author wenzi1
|
||||
// @date 20180604
|
||||
func prepare(xmlbyte []byte) error {
|
||||
patten := `<\?xml.*encoding\s*=\s*['|"](.*?)['|"].*\?>`
|
||||
charsetReader := func(charset string, input io.Reader) (io.Reader, error) {
|
||||
reader := mahonia.GetCharset(charset)
|
||||
if reader == nil {
|
||||
@ -66,8 +67,8 @@ func Prepare(xmlbyte []byte) error {
|
||||
return errors.New(fmt.Sprintf("not support charset:%s", matchStr[1]))
|
||||
}
|
||||
|
||||
if charset.Name != "UTF-8" {
|
||||
mxj.CustomDecoder = &xml.Decoder{Strict:false,CharsetReader:charsetReader}
|
||||
if !strings.EqualFold(charset.Name, "UTF-8") {
|
||||
mxj.CustomDecoder = &xml.Decoder{Strict : false, CharsetReader : charsetReader}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -6,7 +6,7 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
a , e := gregx.MatchString(`(.+):(\d+),{0,1}(\d*),{0,1}(.*)`, "127.0.0.1:12333")
|
||||
a , e := gregx.MatchString(`<\?xml.*encoding\s*=\s*['|"](.*?)['|"].*\?>`, `<?xml version= '1.0' encoding = "utf-8" ?>`)
|
||||
fmt.Println(e)
|
||||
for k, v := range a {
|
||||
fmt.Printf("%d:%v\n", k, v)
|
||||
|
Loading…
Reference in New Issue
Block a user