91 lines
1.8 KiB
Go
Raw Normal View History

2022-01-26 16:40:50 +08:00
package util
import (
"fmt"
"unsafe"
"golang.org/x/text/encoding/simplifiedchinese"
)
func IsGBK(data []byte) bool {
length := len(data)
var i int = 0
for i < length {
if data[i] <= 0xff {
//编码小于等于127,只有一个字节的编码兼容ASCII吗
i++
continue
} else {
//大于127的使用双字节编码
if data[i] >= 0x81 &&
data[i] <= 0xfe &&
data[i+1] >= 0x40 &&
data[i+1] <= 0xfe &&
data[i+1] != 0xf7 {
i += 2
continue
} else {
return false
}
}
}
return true
}
func preNUm(data byte) int {
str := fmt.Sprintf("%b", data)
var i int = 0
for i < len(str) {
if str[i] != '1' {
break
}
i++
}
return i
}
func StrToBytes(s string) []byte {
x := (*[2]uintptr)(unsafe.Pointer(&s))
h := [3]uintptr{x[0], x[1], x[1]}
return *(*[]byte)(unsafe.Pointer(&h))
}
func BytesToStr(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
func IsUtf8(data []byte) bool {
for i := 0; i < len(data); {
if data[i]&0x80 == 0x00 {
// 0XXX_XXXX
i++
continue
} else if num := preNUm(data[i]); num > 2 {
// 110X_XXXX 10XX_XXXX
// 1110_XXXX 10XX_XXXX 10XX_XXXX
// 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
// 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
// 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
// preNUm() 返回首个字节的8个bits中首个0bit前面1bit的个数该数量也是该字符所使用的字节数
i++
for j := 0; j < num-1; j++ {
//判断后面的 num - 1 个字节是不是都是10开头
if data[i]&0xc0 != 0x80 {
return false
}
i++
}
} else {
//其他情况说明不是utf-8
return false
}
}
return true
}
func GbkToUtf8(gbkData []byte) []byte {
utf8Data, _ := simplifiedchinese.GBK.NewDecoder().Bytes(gbkData)
return utf8Data
}