UTF-8 ArrayBuffer和String之间的转换
我有一个ArrayBuffer
,其中包含使用UTF-8编码的string,我无法find将此类ArrayBuffer
转换为JS String
(我知道使用UTF-16编码)的标准方法。
我已经在很多地方看到了这个代码,但是我没有看到它是如何处理比1字节长的任何UTF-8代码点的。
return String.fromCharCode.apply(null, new Uint8Array(data));
同样,我找不到从String
转换为UTF-8编码的ArrayBuffer
的标准方式。
function stringToUint(string) { var string = btoa(unescape(encodeURIComponent(string))), charList = string.split(''), uintArray = []; for (var i = 0; i < charList.length; i++) { uintArray.push(charList[i].charCodeAt(0)); } return new Uint8Array(uintArray); } function uintToString(uintArray) { var encodedString = String.fromCharCode.apply(null, uintArray), decodedString = decodeURIComponent(escape(atob(encodedString))); return decodedString; }
我已经做了,在互联网上的一些帮助,这些小function,他们应该解决你的问题! 这里是工作的JSFiddle 。
编辑 :
由于Uint8Array的来源是外部的,你不能使用atob
你只需要删除它( 工作小提琴 ):
function uintToString(uintArray) { var encodedString = String.fromCharCode.apply(null, uintArray), decodedString = decodeURIComponent(escape(encodedString)); return decodedString; }
这应该工作:
// http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt /* utf.js - UTF-8 <=> UTF-16 convertion * * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp> * Version: 1.0 * LastModified: Dec 25 1999 * This library is free. You can redistribute it and/or modify it. */ function Utf8ArrayToStr(array) { var out, i, len, c; var char2, char3; out = ""; len = array.length; i = 0; while(i < len) { c = array[i++]; switch(c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx out += String.fromCharCode(c); break; case 12: case 13: // 110x xxxx 10xx xxxx char2 = array[i++]; out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx char2 = array[i++]; char3 = array[i++]; out += String.fromCharCode(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; } } return out; }
与其他解决scheme相比,它更清洁,因为它不使用任何黑客,也不依赖于浏览器的JSfunction,例如在其他JS环境中工作。
查看JSFiddle演示 。
也看到相关的问题: 这里 , 这里
Github上有一个用于编码的 polyfill: 文本编码 。 Node或浏览器很容易,Readmebuild议如下:
var uint8array = TextEncoder(encoding).encode(string); var string = TextDecoder(encoding).decode(uint8array);
如果我记得, 'utf-8'
是你需要的encoding
,当然你需要包装你的缓冲区:
var uint8array = new Uint8Array(utf8buffer);
希望它对你有好处,就像它对我一样。
使用TextEncoder和TextDecoder
var uint8array = new TextEncoder("utf-8").encode("Plain Text"); var string = new TextDecoder().decode(uint8array); console.log(uint8array ,string )
如果你在浏览器中这样做,没有内置的字符编码库,但你可以通过:
function pad(n) { return n.length < 2 ? "0" + n : n; } var array = new Uint8Array(data); var str = ""; for( var i = 0, len = array.length; i < len; ++i ) { str += ( "%" + pad(array[i].toString(16))) } str = decodeURIComponent(str);
下面是一个演示,解码一个3字节的UTF-8单元: http : //jsfiddle.net/Z9pQE/
我面临同样的问题,但需要能够逐步parsing/写入UTF8编码数据。 这里是我刚才所面对的这个问题的一个库https://github.com/nfroidure/UTF8.js 。
编辑:似乎Mozilla正在为我们烹饪的东西:StringView( https://developer.mozilla.org/en-US/docs/Code_snippets/StringView?redirectlocale=en-US&redirectslug=Web%2FJavaScript%2FTyped_arrays%2FStringView#encoding_values )
程序员寻找从字节数组到string的转换的主要问题是unicode字符的UTF-8编码(压缩)。 此代码将帮助您:
var getString = function (strBytes) { var MAX_SIZE = 0x4000; var codeUnits = []; var highSurrogate; var lowSurrogate; var index = -1; var result = ''; while (++index < strBytes.length) { var codePoint = Number(strBytes[index]); if (codePoint == (codePoint & 0x7F)) { } else if (0xC0 == (codePoint & 0xC0)) { codePoint ^= 0xC0; codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); } else if (0xE0 == (codePoint & 0xE0)) { codePoint ^= 0xE0; codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); } else if (0xF0 == (codePoint & 0xF0)) { codePoint ^= 0xF0; codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); } if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint) throw RangeError('Invalid code point: ' + codePoint); if (codePoint <= 0xFFFF) codeUnits.push(codePoint); else { codePoint -= 0x10000; highSurrogate = (codePoint >> 10) | 0xD800; lowSurrogate = (codePoint % 0x400) | 0xDC00; codeUnits.push(highSurrogate, lowSurrogate); } if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) { result += String.fromCharCode.apply(null, codeUnits); codeUnits.length = 0; } } return result; }
祝一切顺利 !
FileReader对象中的readAsArrayBuffer和readAsText方法将Blob对象转换为ArrayBuffer或asynchronous的DOMString。
例如,可以从原始文本或字节数组创buildBlob对象types。
let blob = new Blob([text], { type: "text/plain" }); let reader = new FileReader(); reader.onload = event => { let buffer = event.target.result; }; reader.readAsArrayBuffer(blob);
我认为把这个承诺包装好会更好:
function textToByteArray(text) { let blob = new Blob([text], { type: "text/plain" }); let reader = new FileReader(); let done = function() { }; reader.onload = event => { done(new Uint8Array(event.target.result)); }; reader.readAsArrayBuffer(blob); return { done: function(callback) { done = callback; } } } function byteArrayToText(bytes, encoding) { let blob = new Blob([bytes], { type: "application/octet-stream" }); let reader = new FileReader(); let done = function() { }; reader.onload = event => { done(event.target.result); }; if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); } return { done: function(callback) { done = callback; } } } let text = "\uD83D\uDCA9 = \u2661"; textToByteArray(text).done(bytes => { console.log(bytes); byteArrayToText(bytes, 'UTF-8').done(text => { console.log(text); // 💩 = ♡ }); });