1340691923@qq.com ebbf4120bf 第一次提交
2022-01-26 16:40:50 +08:00

91 lines
1.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package util
import (
"fmt"
"unsafe"
"golang.org/x/text/encoding/simplifiedchinese"
)
func IsGBK(data []byte) bool {
length := len(data)
var i int = 0
for i < length {
if data[i] <= 0xff {
//编码小于等于127,只有一个字节的编码兼容ASCII吗
i++
continue
} else {
//大于127的使用双字节编码
if data[i] >= 0x81 &&
data[i] <= 0xfe &&
data[i+1] >= 0x40 &&
data[i+1] <= 0xfe &&
data[i+1] != 0xf7 {
i += 2
continue
} else {
return false
}
}
}
return true
}
func preNUm(data byte) int {
str := fmt.Sprintf("%b", data)
var i int = 0
for i < len(str) {
if str[i] != '1' {
break
}
i++
}
return i
}
func StrToBytes(s string) []byte {
x := (*[2]uintptr)(unsafe.Pointer(&s))
h := [3]uintptr{x[0], x[1], x[1]}
return *(*[]byte)(unsafe.Pointer(&h))
}
func BytesToStr(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
func IsUtf8(data []byte) bool {
for i := 0; i < len(data); {
if data[i]&0x80 == 0x00 {
// 0XXX_XXXX
i++
continue
} else if num := preNUm(data[i]); num > 2 {
// 110X_XXXX 10XX_XXXX
// 1110_XXXX 10XX_XXXX 10XX_XXXX
// 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
// 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
// 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
// preNUm() 返回首个字节的8个bits中首个0bit前面1bit的个数该数量也是该字符所使用的字节数
i++
for j := 0; j < num-1; j++ {
//判断后面的 num - 1 个字节是不是都是10开头
if data[i]&0xc0 != 0x80 {
return false
}
i++
}
} else {
//其他情况说明不是utf-8
return false
}
}
return true
}
func GbkToUtf8(gbkData []byte) []byte {
utf8Data, _ := simplifiedchinese.GBK.NewDecoder().Bytes(gbkData)
return utf8Data
}