Uint8Array在Javascript中的string
我有一些UTF-8编码的数据生活在一系列的Javascript中的Uint8Array元素。 有没有一种有效的方法来解码这些出一个正常的JavaScriptstring(我相信Javascript使用16位Unicode)? 我不想在当时添加一个字符,因为string会变成CPU密集型的。
编码标准中的 TextEncoder
和TextDecoder
由stringencoding库进行 TextDecoder
,在string和ArrayBuffers之间进行转换:
var uint8array = new TextEncoder("utf-8").encode("¢"); var string = new TextDecoder("utf-8").decode(uint8array);
这应该工作:
// http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt /* utf.js - UTF-8 <=> UTF-16 convertion * * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp> * Version: 1.0 * LastModified: Dec 25 1999 * This library is free. You can redistribute it and/or modify it. */ function Utf8ArrayToStr(array) { var out, i, len, c; var char2, char3; out = ""; len = array.length; i = 0; while(i < len) { c = array[i++]; switch(c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx out += String.fromCharCode(c); break; case 12: case 13: // 110x xxxx 10xx xxxx char2 = array[i++]; out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx char2 = array[i++]; char3 = array[i++]; out += String.fromCharCode(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; } } return out; }
与其他解决scheme相比,它更清洁,因为它不使用任何黑客,也不依赖于浏览器的JSfunction,例如在其他JS环境中工作。
查看JSFiddle演示 。
也看到相关的问题: 这里和这里
以下是我使用的:
var str = String.fromCharCode.apply(null, uint8Arr);
在其中一个Chrome示例应用程序中find,尽pipe这是针对较大的数据块而言的,您可以使用asynchronous转换。
/** * Converts an array buffer to a string * * @private * @param {ArrayBuffer} buf The buffer to convert * @param {Function} callback The function to call when conversion is complete */ function _arrayBufferToString(buf, callback) { var bb = new Blob([new Uint8Array(buf)]); var f = new FileReader(); f.onload = function(e) { callback(e.target.result); }; f.readAsText(bb); }
做什么@Sudhir说,然后从逗号分隔的数字使用列表中获得一个string:
for (var i=0; i<unitArr.byteLength; i++) { myString += String.fromCharCode(unitArr[i]) }
这将给你你想要的string,如果它仍然是相关的
只要所提供的函数不经常被调用,Albert所给出的解决scheme就可以很好地工作,而且只能用于大小适中的数组,否则效率会非常低。 这是一个增强的vanilla JavaScript解决scheme,适用于Node和浏览器,具有以下优点:
•为所有八位字节数组大小高效工作
•不会产生中间丢弃string
•支持现代JS引擎的4字节字符(否则用“?”代替)
var utf8ArrayToStr = (function () { var charCache = new Array(128); // Preallocate the cache for the common single byte chars var charFromCodePt = String.fromCodePoint || String.fromCharCode; var result = []; return function (array) { var codePt, byte1; var buffLen = array.length; result.length = 0; for (var i = 0; i < buffLen;) { byte1 = array[i++]; if (byte1 <= 0x7F) { codePt = byte1; } else if (byte1 <= 0xDF) { codePt = ((byte1 & 0x1F) << 6) | (array[i++] & 0x3F); } else if (byte1 <= 0xEF) { codePt = ((byte1 & 0x0F) << 12) | ((array[i++] & 0x3F) << 6) | (array[i++] & 0x3F); } else if (String.fromCodePoint) { codePt = ((byte1 & 0x07) << 18) | ((array[i++] & 0x3F) << 12) | ((array[i++] & 0x3F) << 6) | (array[i++] & 0x3F); } else { codePt = 63; // Cannot convert four byte code points, so use "?" instead i += 3; } result.push(charCache[codePt] || (charCache[codePt] = charFromCodePt(codePt))); } return result.join(''); }; })();
在节点“ Buffer
实例也是Uint8Array
实例 ”,所以buf.toString()
在这种情况下工作。
class UTF8{ static encode(str:string){return new UTF8().encode(str)} static decode(data:Uint8Array){return new UTF8().decode(data)} private EOF_byte:number = -1; private EOF_code_point:number = -1; private encoderError(code_point) { console.error("UTF8 encoderError",code_point) } private decoderError(fatal, opt_code_point?):number { if (fatal) console.error("UTF8 decoderError",opt_code_point) return opt_code_point || 0xFFFD; } private inRange(a:number, min:number, max:number) { return min <= a && a <= max; } private div(n:number, d:number) { return Math.floor(n / d); } private stringToCodePoints(string:string) { /** @type {Array.<number>} */ let cps = []; // Based on http://www.w3.org/TR/WebIDL/#idl-DOMString let i = 0, n = string.length; while (i < string.length) { let c = string.charCodeAt(i); if (!this.inRange(c, 0xD800, 0xDFFF)) { cps.push(c); } else if (this.inRange(c, 0xDC00, 0xDFFF)) { cps.push(0xFFFD); } else { // (inRange(c, 0xD800, 0xDBFF)) if (i == n - 1) { cps.push(0xFFFD); } else { let d = string.charCodeAt(i + 1); if (this.inRange(d, 0xDC00, 0xDFFF)) { let a = c & 0x3FF; let b = d & 0x3FF; i += 1; cps.push(0x10000 + (a << 10) + b); } else { cps.push(0xFFFD); } } } i += 1; } return cps; } private encode(str:string):Uint8Array { let pos:number = 0; let codePoints = this.stringToCodePoints(str); let outputBytes = []; while (codePoints.length > pos) { let code_point:number = codePoints[pos++]; if (this.inRange(code_point, 0xD800, 0xDFFF)) { this.encoderError(code_point); } else if (this.inRange(code_point, 0x0000, 0x007f)) { outputBytes.push(code_point); } else { let count = 0, offset = 0; if (this.inRange(code_point, 0x0080, 0x07FF)) { count = 1; offset = 0xC0; } else if (this.inRange(code_point, 0x0800, 0xFFFF)) { count = 2; offset = 0xE0; } else if (this.inRange(code_point, 0x10000, 0x10FFFF)) { count = 3; offset = 0xF0; } outputBytes.push(this.div(code_point, Math.pow(64, count)) + offset); while (count > 0) { let temp = this.div(code_point, Math.pow(64, count - 1)); outputBytes.push(0x80 + (temp % 64)); count -= 1; } } } return new Uint8Array(outputBytes); } private decode(data:Uint8Array):string { let fatal:boolean = false; let pos:number = 0; let result:string = ""; let code_point:number; let utf8_code_point = 0; let utf8_bytes_needed = 0; let utf8_bytes_seen = 0; let utf8_lower_boundary = 0; while (data.length > pos) { let _byte = data[pos++]; if (_byte == this.EOF_byte) { if (utf8_bytes_needed != 0) { code_point = this.decoderError(fatal); } else { code_point = this.EOF_code_point; } } else { if (utf8_bytes_needed == 0) { if (this.inRange(_byte, 0x00, 0x7F)) { code_point = _byte; } else { if (this.inRange(_byte, 0xC2, 0xDF)) { utf8_bytes_needed = 1; utf8_lower_boundary = 0x80; utf8_code_point = _byte - 0xC0; } else if (this.inRange(_byte, 0xE0, 0xEF)) { utf8_bytes_needed = 2; utf8_lower_boundary = 0x800; utf8_code_point = _byte - 0xE0; } else if (this.inRange(_byte, 0xF0, 0xF4)) { utf8_bytes_needed = 3; utf8_lower_boundary = 0x10000; utf8_code_point = _byte - 0xF0; } else { this.decoderError(fatal); } utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed); code_point = null; } } else if (!this.inRange(_byte, 0x80, 0xBF)) { utf8_code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; utf8_lower_boundary = 0; pos--; code_point = this.decoderError(fatal, _byte); } else { utf8_bytes_seen += 1; utf8_code_point = utf8_code_point + (_byte - 0x80) * Math.pow(64, utf8_bytes_needed - utf8_bytes_seen); if (utf8_bytes_seen !== utf8_bytes_needed) { code_point = null; } else { let cp = utf8_code_point; let lower_boundary = utf8_lower_boundary; utf8_code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; utf8_lower_boundary = 0; if (this.inRange(cp, lower_boundary, 0x10FFFF) && !this.inRange(cp, 0xD800, 0xDFFF)) { code_point = cp; } else { code_point = this.decoderError(fatal, _byte); } } } } //Decode string if (code_point !== null && code_point !== this.EOF_code_point) { if (code_point <= 0xFFFF) { if (code_point > 0)result += String.fromCharCode(code_point); } else { code_point -= 0x10000; result += String.fromCharCode(0xD800 + ((code_point >> 10) & 0x3ff)); result += String.fromCharCode(0xDC00 + (code_point & 0x3ff)); } } } return result; }
`
我正在使用这个打字稿片段:
function UInt8ArrayToString(uInt8Array: Uint8Array): string { var s: string = "["; for(var i: number = 0; i < uInt8Array.byteLength; i++) { if( i > 0 ) s += ", "; s += uInt8Array[i]; } s += "]"; return s; }
如果您需要JavaScript版本,请删除types注释。 希望这可以帮助!