字符串转GB2312字节数组
说明:字节在ArkTs上认为是number,字节数组ArkTs中可以用Uint8Array处理。
如果想实现字符串转字节数组, Java 中可以通过
byte[] nameD = name.getBytes("GB2312");
但是OpenHarmony上,通过ArkTs要想将字符串转为GB2312字节数组,好像没有现成的方法实现。
所以,就手搓一份代码吧: 参考UTF-8和GB2312的转码, # js javascript UTF-8 GB2312编码转换
编写ArkTs代码如下:
下面这两个数组的定义,请参考上面的链接,掘金的代码块,超出了字符限制,无法完全展示,只要将上面链接里的数组数据,复制到对应省略的位置即可。 _GB2312_1_87 = [];//GB2312字库结束 _UCS2_Big_1_87 = [
];//GB2312字库结束
GB2312转码实现类
ini
class GB2312{
_GB2312_1_87 = [```];//GB2312字库结束
_UCS2_Big_1_87 = [```];//GB2312字库结束
//大小头互换(高8位和低8位互换)
ConvetEndian(uincodeLE) {
//高8位和低8位互换
var tmp1 = 0, tmp2 = 0, tmp3 = 0;
tmp1 = (uincodeLE & 0x00FF);
tmp2 = uincodeLE >> 8;
tmp3 = tmp2 | (tmp1 << 8);
return tmp3;
}
GB2312_TO_Unicode(gcode) {
for (var i = 0; i < 7614; i++) {
if (gcode == this._GB2312_1_87[i]) {
return this._UCS2_Big_1_87[i];
}
}
return -1;
}
Unicode_TO_GB2312(ucode) {
for (var i = 0; i < 7614; i++) {
if (ucode == this._UCS2_Big_1_87[i]) {
return this._GB2312_1_87[i];
}
}
return -1;
}
//将不定长的数据转换成UNICODE,最长6个字节。返回转换结果,失败返回-1
utf8ToUnicode(indata) {
/*UTF-8编码方式
* 0000 0000 - 0000 007F | 00 0XXXXXXX
* 0000 0080 - 0000 07FF | C0 110XXXXX 10XXXXXX
* 0000 0800 - 0000 FFFF | E0 1110XXXX 10XXXXXX 10XXXXXX
* 0001 0000 - 001F FFFF | F0 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX
* 0020 0000 - 03FF FFFF | F8 111110XX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX
* 0400 0000 - 7FFF FFFF | FC 1111110X 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX
*/
var hbs = indata[0];
var dcode = 0;
var dcnt = 0;
//先看第一个字节,有多少个字节的有效数据,同时取得第一个字节的有效数据
if (hbs > 0xFE) //超标,不能转换
{
return -1;
}
else if (hbs >= 0xFC) // 6
{
dcnt = 6;
dcode |= (hbs & 0x01);
}
else if (hbs >= 0xF8) // 5
{
dcnt = 5;
dcode |= (hbs & 0x03);
}
else if (hbs >= 0xF0) // 4
{
dcnt = 4;
dcode |= (hbs & 0x07);
}
else if (hbs >= 0xE0) // 3
{
dcnt = 3;
dcode |= (hbs & 0x0F);
}
else if (hbs >= 0xC0) // 2
{
dcnt = 2;
dcode |= (hbs & 0x1F);
}
else {
dcnt = 1;
}
if (dcnt == 1) {
return indata[0];
}
for (var i = 1; i < dcnt; i++) {
dcode <<= 6; //低位留出6个bit给新数据
dcode |= (indata[i] & 0x3F); //取得新数据的低位6个bit
}
return dcode;
}
//将最大16位的unicode编码转换成utf8
Unicode_TO_UTF8(code) {
/*UTF-8编码方式
* 0000 0000 - 0000 007F | 0XXXXXXX
* 0000 0080 - 0000 07FF | 110XXXXX 10XXXXXX
* 0000 0800 - 0000 FFFF | 1110XXXX 10XXXXXX 10XXXXXX
* 0001 0000 - 001F FFFF | 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX
* 0020 0000 - 03FF FFFF | 111110XX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX
* 0400 0000 - 7FFF FFFF | 1111110X 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX 10XXXXXX
*/
var out = [0];
var bCnt = -1;
if (code > 0x7FFFFFFF) //太大放不下了
{
bCnt = -1;
return -1;
}
else if (code > 0x03FFFFFF) // 6个字节,0400 0000 - 7FFF FFFF
{
bCnt = 6;
}
else if (code > 0x001FFFFF) // 5个字节
{
bCnt = 5;
}
else if (code > 0x0000FFFF) // 4个字节
{
bCnt = 4;
}
else if (code > 0x000007FF) // 3个字节
{
bCnt = 3;
}
else if (code > 0x0000007F) // 2个字节
{
bCnt = 2;
}
else // 1个字节
{
bCnt = 1;
}
if (bCnt == 1) {
out[0] = code;
return bCnt;
}
var hbs = 0x80; //首字节高位
var utf8Code = 0;
// qDebug("codeIn:%x",code);
for (var i = 0; i < (bCnt - 1); i++) {
out[bCnt - i - 1] = 0x80 | (code & 0x3f); //取最后6Bits
code >>= 6; //丢掉6Bits
hbs >>= 1;
hbs |= 0x80; //首字节高位多一个"1"
}
out[0] = hbs | code; //首字节
for (var i = 0; i < bCnt; i++) {
utf8Code <<= 8;
utf8Code |= out[i];
}
return utf8Code;
}
// 字符串转成GB2312的Hex字符串
// let gb2312HexString = GB2312.TextCodec("GB2312","long",fixedMsg.Message);
TextCodec(codeType, format, inputStr) {
// var InputStrCode = '';
// var OutputStr = '\n';
var OutputStr = '';
var inputCode = 0;
var targetCode = 0;
var inputChar = new String;
for (var i = 0; i < inputStr.length; i++) {
inputCode = inputStr.charCodeAt(i);
inputChar = inputStr.charAt(i);
targetCode = inputCode;
// OutputStr += "//targetCode:" + targetCode.toString(16).toUpperCase() + "\n";
if (inputCode > 0x80) {//acscii 0~127
var unicode = inputCode;//Mixly 默认编码 UCS-2 Big Endian
// OutputStr += "//unicode:" + unicode.toString(16).toUpperCase() + "\n";
switch (codeType) {
case "GB2312":
targetCode = this.Unicode_TO_GB2312(unicode);
break;
case "UTF-8":
targetCode = this.Unicode_TO_UTF8(unicode);
break;
case "UCS-2 Big Endian":
targetCode = unicode;
break;
case "UCS-2 Little Endian":
targetCode = this.ConvetEndian(unicode);
break;
default:
break;
}
}
switch (format) {
case 'short':
if (targetCode & 0xff0000)//3bytes
{
for (var k = 2; k >= 0; k--) {
var _byte = (targetCode >> (8 * k)) & 0xff;
OutputStr += '0x' + _byte.toString(16).toUpperCase() + ',';
}
}
else if (targetCode & 0xff00)//2bytes
{
for (var k = 1; k >= 0; k--) {
var _byte = (targetCode >> (8 * k)) & 0xff;
OutputStr += '0x' + _byte.toString(16).toUpperCase() + ',';
}
}
else //1byte
{
var _byte = targetCode;
OutputStr += '0x' + _byte.toString(16).toUpperCase() + ',';
}
OutputStr += "//" + inputChar + " " + codeType + "\n";
break;
case 'long':
// OutputStr += '0x' + targetCode.toString(16).toUpperCase() + ',' + "//" + inputChar + " " + codeType + "\n";
OutputStr += targetCode.toString(16).toUpperCase();
break;
default:
OutputStr += '0x' + targetCode.toString(16).toUpperCase() + ',' + "//" + inputChar + " " + codeType + "\n";
break;
}
}
return OutputStr;
};
}
export default new GB2312()
使用下面的调用方法,就可以得到Hex16进制的字符串。 然后再使用字符串转字节数组的方式,得到ArkTs中的字节数组Uint8Array
调用方法
ini
let gb2312HexString = GB2312.TextCodec("GB2312","long",fixedMsg.Message);
let GB2312Bytes:Uint8Array = BCDDecode.hexStr2Bytes(gb2312HexString);
Hex转Uint8array
ini
hexStr2Bytes(src: string): Uint8Array {
src = src.replace(/\s+/g, ""); // 移除所有空白字符
const l = src.length / 2;
const ret = new Uint8Array(l);
for (let i = 0; i < l; i++) {
const m = i * 2 + 1;
const n = m + 1;
const sss = "0x" + src.substring(i * 2, m) + src.substring(m, n);
ret[i] = parseInt(sss, 16); // parseInt 可以直接解析十六进制字符串
}
return ret;
}
GB2312字节数组转字符串
下面代码直接调用
php
/**
* 将 Uint8Array 转换为 GB2312 编码的字符串
* @param data Uint8Array 数据
* @returns GB2312 编码的字符串
*/
gb2312BytesToString(buffer: Uint8Array):string {
let textDecoder = util.TextDecoder.create("gb2312");
return textDecoder.decodeWithStream(buffer)
}