如何将字节数组转换为hexstring,反之亦然?
如何将字节数组转换为hexstring,反之亦然?
或者:
public static string ByteArrayToString(byte[] ba) { StringBuilder hex = new StringBuilder(ba.Length * 2); foreach (byte b in ba) hex.AppendFormat("{0:x2}", b); return hex.ToString(); }
要么:
public static string ByteArrayToString(byte[] ba) { string hex = BitConverter.ToString(ba); return hex.Replace("-",""); }
这里有更多的变种,比如这里 。
反向转换将如下所示:
public static byte[] StringToByteArray(String hex) { int NumberChars = hex.Length; byte[] bytes = new byte[NumberChars / 2]; for (int i = 0; i < NumberChars; i += 2) bytes[i / 2] = Convert.ToByte(hex.Substring(i, 2), 16); return bytes; }
使用Substring
是与Convert.ToByte
结合的最佳select。 请参阅此答案以获取更多信息。 如果您需要更好的性能,则必须先避免Convert.ToByte
然后才能删除SubString
。
性能分析
注:2015-08-20新领导。
我通过一些粗略的Stopwatch
性能testing,一个随机句子(n = 61,1000次迭代)和一个带有Gutenburg项目(n = 1,238,957,150次迭代)的运行,运行了各种转换方法。 这里是结果,大致从最快到最慢。 所有的测量都是以蜱( 10,000 ticks = 1 ms )为单位的,所有的相关音符都与[最慢的] StringBuilder
实现进行比较。 对于使用的代码,请参阅下面或testing框架回购 ,我现在维护运行此代码。
放弃
警告:不要依赖这些统计数据来做任何具体的事情。 他们只是样本数据的一个样本运行。 如果您真的需要一stream的性能,请在代表您的生产需求的环境中使用代表您将使用的数据来testing这些方法。
结果
- 按字节查找
unsafe
(通过CodesInChaos) (添加到airbreathertesting回购)- 文本:4,727.85(105.2X)
- 句子:0.28(99.7X)
- 按字节查找(通过CodesInChaos)
- 文本:10,853.96(快45.8倍)
- 句子:0.65(快42.7倍)
- 字节操作2(通过CodesInChaos)
- 文本:12,967.69(快38.4倍)
- 句子:0.73(快37.9倍)
- 字节操作(通过Waleed Eissa)
- 文本:16,856.64(快29.5倍)
- 一句话:0.70(快39.5倍)
- Lookup / Shift(通过Nathan Moinvaziri)
- 文字:23,201.23(快21.4倍)
- 句子:1.24(快22.3倍)
- 通过轻咬查找(通过Brian Lambert)
- 文本:23,879.41(快20.8倍)
- 句子:1.15(快23.9倍)
-
BitConverter
(通过Tomalak)- 文本:113,269.34(快4.4倍)
- 一句话:9.98(快2.8倍)
-
{SoapHexBinary}.ToString
(通过Mykroft)- 文字:178,601.39(快2.8倍)
- 一句话:10.68(快2.6倍)
-
{byte}.ToString("X2")
(使用foreach
)(源自Will Dean的答案)- 文字:308,805.38(快2.4倍)
- 一句话:16.89(快2.4倍)
-
{byte}.ToString("X2")
(使用{IEnumerable}.Aggregate
,需要System.Linq)(通过Mark)- 文本:352,828.20(快2.1倍)
- 一句话:16.87(快2.4倍)
-
Array.ConvertAll
(使用string.Join
)(通过Will Dean)- 文字:675,451.57(快1.1倍)
- 一句话:17.95(快2.2倍)
-
Array.ConvertAll
(使用string.Concat
,需要.NET 4.0)(通过Will Dean)- 文字:752,078.70(快1.0倍)
- 一句话:18.28(快2.2倍)
-
{StringBuilder}.AppendFormat
(使用foreach
)(通过Tomalak)- 文字:672,115.77(快1.1倍)
- 一句话:36.82(快1.1倍)
-
{StringBuilder}.AppendFormat
(使用{IEnumerable}.Aggregate
,需要System.Linq)(来自Tomalak的答案)- 文字:718,380.63(快了1.0倍)
- 一句话:39.71(快1.0倍)
查找表已经领先于字节操作。 基本上,有一些forms的预先计算任何给定的半字节或字节将在hex。 然后,在翻阅数据时,只需查看下一部分即可查看hexstring。 然后该值以某种方式添加到结果string输出中。 对于很长时间的字节操作,一些开发人员可能难以阅读,是最高性能的方法。
你最好的select仍然是find一些有代表性的数据,并在类似生产的环境中试用。 如果你有不同的内存限制,你可能更喜欢一个分配较less的方法,而这个方法会更快,但会消耗更多的内存。
testing代码
随意玩我使用的testing代码。 这里包含一个版本,但是可以随意克隆回购并添加自己的方法。 如果您发现任何有趣的事情或想要帮助改进它使用的testing框架,请提交一个请求。
- 将新的静态方法(
Func<byte[], string>
)添加到/Tests/ConvertByteArrayToHexString/Test.cs。 - 将该方法的名称添加到同一个类中的
TestCandidates
返回值。 - 通过在同一个类中切换
GenerateTestInput
中的注释,确保您正在运行所需的input版本,句子或文本。 - 点击F5并等待输出(HTML转储也在/ bin文件夹中生成)。
static string ByteArrayToHexStringViaStringJoinArrayConvertAll(byte[] bytes) { return string.Join(string.Empty, Array.ConvertAll(bytes, b => b.ToString("X2"))); } static string ByteArrayToHexStringViaStringConcatArrayConvertAll(byte[] bytes) { return string.Concat(Array.ConvertAll(bytes, b => b.ToString("X2"))); } static string ByteArrayToHexStringViaBitConverter(byte[] bytes) { string hex = BitConverter.ToString(bytes); return hex.Replace("-", ""); } static string ByteArrayToHexStringViaStringBuilderAggregateByteToString(byte[] bytes) { return bytes.Aggregate(new StringBuilder(bytes.Length * 2), (sb, b) => sb.Append(b.ToString("X2"))).ToString(); } static string ByteArrayToHexStringViaStringBuilderForEachByteToString(byte[] bytes) { StringBuilder hex = new StringBuilder(bytes.Length * 2); foreach (byte b in bytes) hex.Append(b.ToString("X2")); return hex.ToString(); } static string ByteArrayToHexStringViaStringBuilderAggregateAppendFormat(byte[] bytes) { return bytes.Aggregate(new StringBuilder(bytes.Length * 2), (sb, b) => sb.AppendFormat("{0:X2}", b)).ToString(); } static string ByteArrayToHexStringViaStringBuilderForEachAppendFormat(byte[] bytes) { StringBuilder hex = new StringBuilder(bytes.Length * 2); foreach (byte b in bytes) hex.AppendFormat("{0:X2}", b); return hex.ToString(); } static string ByteArrayToHexViaByteManipulation(byte[] bytes) { char[] c = new char[bytes.Length * 2]; byte b; for (int i = 0; i < bytes.Length; i++) { b = ((byte)(bytes[i] >> 4)); c[i * 2] = (char)(b > 9 ? b + 0x37 : b + 0x30); b = ((byte)(bytes[i] & 0xF)); c[i * 2 + 1] = (char)(b > 9 ? b + 0x37 : b + 0x30); } return new string(c); } static string ByteArrayToHexViaByteManipulation2(byte[] bytes) { char[] c = new char[bytes.Length * 2]; int b; for (int i = 0; i < bytes.Length; i++) { b = bytes[i] >> 4; c[i * 2] = (char)(55 + b + (((b - 10) >> 31) & -7)); b = bytes[i] & 0xF; c[i * 2 + 1] = (char)(55 + b + (((b - 10) >> 31) & -7)); } return new string(c); } static string ByteArrayToHexViaSoapHexBinary(byte[] bytes) { SoapHexBinary soapHexBinary = new SoapHexBinary(bytes); return soapHexBinary.ToString(); } static string ByteArrayToHexViaLookupAndShift(byte[] bytes) { StringBuilder result = new StringBuilder(bytes.Length * 2); string hexAlphabet = "0123456789ABCDEF"; foreach (byte b in bytes) { result.Append(hexAlphabet[(int)(b >> 4)]); result.Append(hexAlphabet[(int)(b & 0xF)]); } return result.ToString(); } static readonly uint* _lookup32UnsafeP = (uint*)GCHandle.Alloc(_Lookup32, GCHandleType.Pinned).AddrOfPinnedObject(); static string ByteArrayToHexViaLookup32UnsafeDirect(byte[] bytes) { var lookupP = _lookup32UnsafeP; var result = new string((char)0, bytes.Length * 2); fixed (byte* bytesP = bytes) fixed (char* resultP = result) { uint* resultP2 = (uint*)resultP; for (int i = 0; i < bytes.Length; i++) { resultP2[i] = lookupP[bytesP[i]]; } } return result; } static uint[] _Lookup32 = Enumerable.Range(0, 255).Select(i => { string s = i.ToString("X2"); return ((uint)s[0]) + ((uint)s[1] << 16); }).ToArray(); static string ByteArrayToHexViaLookupPerByte(byte[] bytes) { var result = new char[bytes.Length * 2]; for (int i = 0; i < bytes.Length; i++) { var val = _Lookup32[bytes[i]]; result[2*i] = (char)val; result[2*i + 1] = (char) (val >> 16); } return new string(result); } static string ByteArrayToHexViaLookup(byte[] bytes) { string[] hexStringTable = new string[] { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D", "1E", "1F", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C", "3D", "3E", "3F", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "4A", "4B", "4C", "4D", "4E", "4F", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A", "6B", "6C", "6D", "6E", "6F", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E", "7F", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8A", "8B", "8C", "8D", "8E", "8F", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F", "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF", "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF", "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF", "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF", "E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF", "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF", }; StringBuilder result = new StringBuilder(bytes.Length * 2); foreach (byte b in bytes) { result.Append(hexStringTable[b]); } return result.ToString(); }
更新(2010-01-13)
增加了Waleed的分析答案。 蛮快。
更新(2011-10-05)
添加了string.Concat
Array.ConvertAll
变体的完整性(需要.NET 4.0)。 与string.Join
版本相同。
更新(2012-02-05)
testing回购包括更多的变种,如StringBuilder.Append(b.ToString("X2"))
。 没有打乱任何结果。 foreach
比{IEnumerable}.Aggregate
。例如, {IEnumerable}.Aggregate
,但BitConverter
仍然胜利。
更新(2012-04-03)
添加了Mykroft的SoapHexBinary
分析解决scheme,获得了第三名。
更新(2013-01-15)
添加了CodesInChaos的字节操作答案,占据了第一名(在大块文本上大幅增加)。
更新(2013-05-23)
添加了Nathan Moinvaziri的查找答案和Brian Lambert博客的变体。 两者相当快,但在我使用的testing机器(AMD Phenom 9750)上没有领先。
更新(2014-07-31)
增加了@ CodesInChaos的新的基于字节的查找答案。 它似乎在句子testing和全文testing中都处于领先地位。
更新(2015-08-20)
增加了airbreather的优化和unsafe
变种到这个答案的回购 。 如果你想在不安全的游戏中玩,你可以在短串和大文本上获得比以前任何优胜者都大的性能提升。
有一个名为SoapHexBinary的类,正是你想要的。
using System.Runtime.Remoting.Metadata.W3cXsd2001; public static byte[] GetStringToBytes(string value) { SoapHexBinary shb = SoapHexBinary.Parse(value); return shb.Value; } public static string GetBytesToString(byte[] value) { SoapHexBinary shb = new SoapHexBinary(value); return shb.ToString(); }
在编写encryption代码时,通常要避免数据相关的分支和表查找,以确保运行时不依赖于数据,因为依赖于数据的时序可能导致旁路通道攻击。
它也很快。
static string ByteToHexBitFiddle(byte[] bytes) { char[] c = new char[bytes.Length * 2]; int b; for (int i = 0; i < bytes.Length; i++) { b = bytes[i] >> 4; c[i * 2] = (char)(55 + b + (((b-10)>>31)&-7)); b = bytes[i] & 0xF; c[i * 2 + 1] = (char)(55 + b + (((b-10)>>31)&-7)); } return new string(c); }
Ph'nglui mglw'nafh Cthulhu R'lyeh wgah'nagl fhtagn
放弃所有希望,谁进入这里
奇怪的位的解释:
-
bytes[i] >> 4
提取一个字节的高半字节
bytes[i] & 0xF
提取一个字节的低半字节 -
b - 10
对于值b < 10
< 0
,它将变成十进制数字
对于值b > 10
,>= 0
,这将成为从A
到F
一个字母。 - 在有符号的32位整数上使用
i >> 31
提取符号,这要感谢符号扩展。 对于i < 0
,它将是-1
,对于i >= 0
将是i >= 0
。 - 结合2)和3),显示
(b-10)>>31
对于字母将是0
,对于数字是-1
。 - 我们把它映射到
A
(65)到F
(70),这意味着添加55('A'-10
) 。 - 看一下数字的情况,我们想调整最后的加数,所以它将
b
从0到9的范围映射到0
(48)到9
(57)的范围。 这意味着它需要变成-7('0' - 55
)。
现在我们可以乘以7.但是由于-1代表所有位为1,我们可以使用& -7
自(0 & -7) == 0
和(-1 & -7) == -7
。
进一步的考虑:
- 我没有使用第二个循环variables索引到
c
,因为测量表明,从i
计算它更便宜。 - 正好使用
i < bytes.Length
作为循环的上界允许JITter消除对bytes[i]
边界检查,所以我select了这个变体。 - 使
b
成为一个整数允许不必要的转换和字节。
如果你想要比BitConverter
更灵活,但不想要那些笨重的20世纪90年代风格的显式循环,那么你可以这样做:
String.Join(String.Empty, Array.ConvertAll(bytes, x => x.ToString("X2")));
或者,如果您使用.NET 4.0:
String.Concat(Array.ConvertAll(bytes, x => x.ToString("X2")));
(后者来自对原帖的评论。)
您可以使用BitConverter.ToString方法:
byte[] bytes = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256} Console.WriteLine( BitConverter.ToString(bytes));
输出:
00-01-02-04-08-10-20-40-80-FF
更多信息: BitConverter.ToString方法(字节[])
我今天遇到了同样的问题,而且我遇到了这样的代码:
private static string ByteArrayToHex(byte[] barray) { char[] c = new char[barray.Length * 2]; byte b; for (int i = 0; i < barray.Length; ++i) { b = ((byte)(barray[i] >> 4)); c[i * 2] = (char)(b > 9 ? b + 0x37 : b + 0x30); b = ((byte)(barray[i] & 0xF)); c[i * 2 + 1] = (char)(b > 9 ? b + 0x37 : b + 0x30); } return new string(c); }
来源:论坛postbyte []数组到hexstring (见PZahra的post)。 我修改了一些代码来删除0x前缀。
我对代码做了一些性能testing,比使用BitConverter.ToString()快了近八倍(根据patridge的post,速度最快)。
另一种基于查找表的方法。 这个对每个字节只使用一个查找表,而不是每个半字节的查找表。
private static readonly uint[] _lookup32 = CreateLookup32(); private static uint[] CreateLookup32() { var result = new uint[256]; for (int i = 0; i < 256; i++) { string s=i.ToString("X2"); result[i] = ((uint)s[0]) + ((uint)s[1] << 16); } return result; } private static string ByteArrayToHexViaLookup32(byte[] bytes) { var lookup32 = _lookup32; var result = new char[bytes.Length * 2]; for (int i = 0; i < bytes.Length; i++) { var val = lookup32[bytes[i]]; result[2*i] = (char)val; result[2*i + 1] = (char) (val >> 16); } return new string(result); }
我还在查找表中使用ushort
, struct{char X1, X2}
, struct{byte X1, X2}
testing了这个变体。
根据编译目标(x86,X64)的不同,它们的性能大致相同,或稍微低于这个变体。
而为了更高的performance,其unsafe
兄弟姐妹:
private static readonly uint[] _lookup32Unsafe = CreateLookup32Unsafe(); private static readonly uint* _lookup32UnsafeP = (uint*)GCHandle.Alloc(_lookup32Unsafe,GCHandleType.Pinned).AddrOfPinnedObject(); private static uint[] CreateLookup32Unsafe() { var result = new uint[256]; for (int i = 0; i < 256; i++) { string s=i.ToString("X2"); if(BitConverter.IsLittleEndian) result[i] = ((uint)s[0]) + ((uint)s[1] << 16); else result[i] = ((uint)s[1]) + ((uint)s[0] << 16); } return result; } public static string ByteArrayToHexViaLookup32Unsafe(byte[] bytes) { var lookupP = _lookup32UnsafeP; var result = new char[bytes.Length * 2]; fixed(byte* bytesP = bytes) fixed (char* resultP = result) { uint* resultP2 = (uint*)resultP; for (int i = 0; i < bytes.Length; i++) { resultP2[i] = lookupP[bytesP[i]]; } } return new string(result); }
或者,如果您认为可以直接写入string:
public static string ByteArrayToHexViaLookup32UnsafeDirect(byte[] bytes) { var lookupP = _lookup32UnsafeP; var result = new string((char)0, bytes.Length * 2); fixed (byte* bytesP = bytes) fixed (char* resultP = result) { uint* resultP2 = (uint*)resultP; for (int i = 0; i < bytes.Length; i++) { resultP2[i] = lookupP[bytesP[i]]; } } return result; }
这个问题也可以使用查找表来解决。 编码器和解码器都需要less量的静态存储器。 这个方法会很快:
- 编码器表512字节或1024字节(如果需要大写和小写两种大小)
- 解码器表256字节或64 KiB(单字符查找或双字符查找)
我的解决scheme使用1024字节的编码表和256字节的解码。
解码
private static readonly byte[] LookupTable = new byte[] { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; private static byte Lookup(char c) { var b = LookupTable[c]; if (b == 255) throw new IOException("Expected a hex character, got " + c); return b; } public static byte ToByte(char[] chars, int offset) { return (byte)(Lookup(chars[offset]) << 4 | Lookup(chars[offset + 1])); }
编码
private static readonly char[][] LookupTableUpper; private static readonly char[][] LookupTableLower; static Hex() { LookupTableLower = new char[256][]; LookupTableUpper = new char[256][]; for (var i = 0; i < 256; i++) { LookupTableLower[i] = i.ToString("x2").ToCharArray(); LookupTableUpper[i] = i.ToString("X2").ToCharArray(); } } public static char[] ToCharLower(byte[] b, int bOffset) { return LookupTableLower[b[bOffset]]; } public static char[] ToCharUpper(byte[] b, int bOffset) { return LookupTableUpper[b[bOffset]]; }
对照
StringBuilderToStringFromBytes: 106148 BitConverterToStringFromBytes: 15783 ArrayConvertAllToStringFromBytes: 54290 ByteManipulationToCharArray: 8444 TableBasedToCharArray: 5651 *
*这个解决scheme
注意
在解码期间IOException和IndexOutOfRangeException可能发生(如果一个字符的值> 256)。 解码/编码stream或数组的方法应该被实现,这只是一个概念的certificate。
这是对Tomalak非常受欢迎的答案 (和随后的编辑)第4版 的回答 。
我会说这个编辑是错误的,并解释为什么它可以被还原。 一路走来,你可能会对一些内部事物有所了解,还可以看到另一个过早优化的例子,以及它是如何咬你的。
tl; dr:只要使用Convert.ToByte
和String.Substring
如果你很着急)(下面的“原始代码”),如果你不想重新实现Convert.ToByte
,这是最好的组合。 使用更高级的东西(请参阅其他答案)如果需要性能,则不使用Convert.ToByte
。 除了String.Substring
和Convert.ToByte
,不要使用其他任何东西,除非有人在这个答案的评论中有一些有趣的话来说这个。
警告: 如果在框架中实现Convert.ToByte(char[], Int32)
重载,则此答案可能会过时。 这不太可能发生。
一般来说,我不太想说“不要过早优化”,因为没人知道什么时候是“早熟”。 决定是否优化时唯一要考虑的是:“我是否有时间和资源来正确调查优化方法?”。 如果你不这样做,那就太早了,等到你的项目更成熟,或者直到你需要performance(如果真有需要的话,那么你会花时间的)。 同时,做最简单的事情,可能反而工作。
原始码:
public static byte[] HexadecimalStringToByteArray_Original(string input) { var outputLength = input.Length / 2; var output = new byte[outputLength]; for (var i = 0; i < outputLength; i++) output[i] = Convert.ToByte(input.Substring(i * 2, 2), 16); return output; }
修订4:
public static byte[] HexadecimalStringToByteArray_Rev4(string input) { var outputLength = input.Length / 2; var output = new byte[outputLength]; using (var sr = new StringReader(input)) { for (var i = 0; i < outputLength; i++) output[i] = Convert.ToByte(new string(new char[2] { (char)sr.Read(), (char)sr.Read() }), 16); } return output; }
修订避免了String.Substring
,而是使用StringReader
。 原因是:
编辑:你可以通过使用一个单一的parsing器来提高长string的性能,就像这样:
那么看看String.Substring
的引用代码 ,显然已经是“单通” 为什么不呢? 它在字节级运行,而不是在代理对上运行。
它确实分配了一个新的string,但是你需要分配一个传递给Convert.ToByte
。 此外,修订版中提供的解决scheme在每次迭代时都分配另一个对象(双字符数组)。 你可以安全地把这个分配放在循环之外,并重新使用这个数组来避免这个。
public static byte[] HexadecimalStringToByteArray(string input) { var outputLength = input.Length / 2; var output = new byte[outputLength]; var numeral = new char[2]; using (var sr = new StringReader(input)) { for (var i = 0; i < outputLength; i++) { numeral[0] = (char)sr.Read(); numeral[1] = (char)sr.Read(); output[i] = Convert.ToByte(new string(numeral), 16); } } return output; }
每个hexnumeral
表示使用两个数字(符号)的单个八位字节。
但是,那么为什么要调用StringReader.Read
两次呢? Just call its second overload and ask it to read two characters in the two-char array at once; and reduce the amount of calls by two.
public static byte[] HexadecimalStringToByteArray(string input) { var outputLength = input.Length / 2; var output = new byte[outputLength]; var numeral = new char[2]; using (var sr = new StringReader(input)) { for (var i = 0; i < outputLength; i++) { var read = sr.Read(numeral, 0, 2); Debug.Assert(read == 2); output[i] = Convert.ToByte(new string(numeral), 16); } } return output; }
What you're left with is a string reader whose only added "value" is a parallel index (internal _pos
) which you could have declared yourself (as j
for example), a redundant length variable (internal _length
), and a redundant reference to the input string (internal _s
). In other words, it's useless.
If you wonder how Read
"reads", just look at the code , all it does is call String.CopyTo
on the input string. The rest is just book-keeping overhead to maintain values we don't need.
So, remove the string reader already, and call CopyTo
yourself; it's simpler, clearer, and more efficient.
public static byte[] HexadecimalStringToByteArray(string input) { var outputLength = input.Length / 2; var output = new byte[outputLength]; var numeral = new char[2]; for (int i = 0, j = 0; i < outputLength; i++, j += 2) { input.CopyTo(j, numeral, 0, 2); output[i] = Convert.ToByte(new string(numeral), 16); } return output; }
Do you really need a j
index that increments in steps of two parallel to i
? Of course not, just multiply i
by two (which the compiler should be able to optimize to an addition).
public static byte[] HexadecimalStringToByteArray_BestEffort(string input) { var outputLength = input.Length / 2; var output = new byte[outputLength]; var numeral = new char[2]; for (int i = 0; i < outputLength; i++) { input.CopyTo(i * 2, numeral, 0, 2); output[i] = Convert.ToByte(new string(numeral), 16); } return output; }
What does the solution look like now? Exactly like it was at the beginning, only instead of using String.Substring
to allocate the string and copy the data to it, you're using an intermediary array to which you copy the hexadecimal numerals to, then allocate the string yourself and copy the data again from the array and into the string (when you pass it in the string constructor). The second copy might be optimized-out if the string is already in the intern pool, but then String.Substring
will also be able to avoid it in these cases.
In fact, if you look at String.Substring
again, you see that it uses some low-level internal knowledge of how strings are constructed to allocate the string faster than you could normally do it, and it inlines the same code used by CopyTo
directly in there to avoid the call overhead.
String.Substring
- Worst-case: One fast allocation, one fast copy.
- Best-case: No allocation, no copy.
Manual method
- Worst-case: Two normal allocations, one normal copy, one fast copy.
- Best-case: One normal allocation, one normal copy.
Conclusion? If you want to use Convert.ToByte(String, Int32)
(because you don't want to re-implement that functionality yourself), there doesn't seem to be a way to beat String.Substring
; all you do is run in circles, re-inventing the wheel (only with sub-optimal materials).
Note that using Convert.ToByte
and String.Substring
is a perfectly valid choice if you don't need extreme performance. Remember: only opt for an alternative if you have the time and resources to investigate how it works properly.
If there was a Convert.ToByte(char[], Int32)
, things would be different of course (it would be possible to do what I described above and completely avoid String
).
I suspect that people who report better performance by "avoiding String.Substring
" also avoid Convert.ToByte(String, Int32)
, which you should really be doing if you need the performance anyway. Look at the countless other answers to discover all the different approaches to do that.
Disclaimer: I haven't decompiled the latest version of the framework to verify that the reference source is up-to-date, I assume it is.
Now, it all sounds good and logical, hopefully even obvious if you've managed to get so far. But is it true?
Intel(R) Core(TM) i7-3720QM CPU @ 2.60GHz Cores: 8 Current Clock Speed: 2600 Max Clock Speed: 2600 -------------------- Parsing hexadecimal string into an array of bytes -------------------- HexadecimalStringToByteArray_Original: 7,777.09 average ticks (over 10000 runs), 1.2X HexadecimalStringToByteArray_BestEffort: 8,550.82 average ticks (over 10000 runs), 1.1X HexadecimalStringToByteArray_Rev4: 9,218.03 average ticks (over 10000 runs), 1.0X
是!
Props to Partridge for the bench framework, it's easy to hack. The input used is the following SHA-1 hash repeated 5000 times to make a 100,000 bytes long string.
209113288F93A9AB8E474EA78D899AFDBB874355
玩的开心! (But optimize with moderation.)
This is a great post. I like Waleed's solution. I haven't run it through patridge's test but it seems to be quite fast. I also needed the reverse process, converting a hex string to a byte array, so I wrote it as a reversal of Waleed's solution. Not sure if it's any faster than Tomalak's original solution. Again, I did not run the reverse process through patridge's test either.
private byte[] HexStringToByteArray(string hexString) { int hexStringLength = hexString.Length; byte[] b = new byte[hexStringLength / 2]; for (int i = 0; i < hexStringLength; i += 2) { int topChar = (hexString[i] > 0x40 ? hexString[i] - 0x37 : hexString[i] - 0x30) << 4; int bottomChar = hexString[i + 1] > 0x40 ? hexString[i + 1] - 0x37 : hexString[i + 1] - 0x30; b[i / 2] = Convert.ToByte(topChar + bottomChar); } return b; }
Complement to answer by @CodesInChaos (reversed method)
public static byte[] HexToByteUsingByteManipulation(string s) { byte[] bytes = new byte[s.Length / 2]; for (int i = 0; i < bytes.Length; i++) { int hi = s[i*2] - 65; hi = hi + 10 + ((hi >> 31) & 7); int lo = s[i*2 + 1] - 65; lo = lo + 10 + ((lo >> 31) & 7) & 0x0f; bytes[i] = (byte) (lo | hi << 4); } return bytes; }
说明:
& 0x0f
is to support also lower case letters
hi = hi + 10 + ((hi >> 31) & 7);
是相同的:
hi = ch-65 + 10 + (((ch-65) >> 31) & 7);
For '0'..'9' it is the same as hi = ch - 65 + 10 + 7;
which is hi = ch - 48
(this is because of 0xffffffff & 7
).
For 'A'..'F' it is hi = ch - 65 + 10;
(this is because of 0x00000000 & 7
).
For 'a'..'f' we have to big numbers so we must subtract 32 from default version by making some bits 0
by using & 0x0f
.
65 is code for 'A'
48 is code for '0'
7 is the number of letters between '9'
and 'A'
in the ASCII table ( ...456789:;<=>?@ABCD...
).
Not to pile on to the many answers here, but I found a fairly optimal (~4.5x better than accepted), straightforward implementation of the hex string parser. First, output from my tests (the first batch is my implementation):
Give me that string: 04c63f7842740c77e545bb0b2ade90b384f119f6ab57b680b7aa575a2f40939f Time to parse 100,000 times: 50.4192 ms Result as base64: BMY/eEJ0DHflRbsLKt6Qs4TxGfarV7aAt6pXWi9Ak58= BitConverter'd: 04-C6-3F-78-42-74-0C-77-E5-45-BB-0B-2A-DE-90-B3-84-F1-19-F6-AB-5 7-B6-80-B7-AA-57-5A-2F-40-93-9F Accepted answer: (StringToByteArray) Time to parse 100000 times: 233.1264ms Result as base64: BMY/eEJ0DHflRbsLKt6Qs4TxGfarV7aAt6pXWi9Ak58= BitConverter'd: 04-C6-3F-78-42-74-0C-77-E5-45-BB-0B-2A-DE-90-B3-84-F1-19-F6-AB-5 7-B6-80-B7-AA-57-5A-2F-40-93-9F With Mono's implementation: Time to parse 100000 times: 777.2544ms Result as base64: BMY/eEJ0DHflRbsLKt6Qs4TxGfarV7aAt6pXWi9Ak58= BitConverter'd: 04-C6-3F-78-42-74-0C-77-E5-45-BB-0B-2A-DE-90-B3-84-F1-19-F6-AB-5 7-B6-80-B7-AA-57-5A-2F-40-93-9F With SoapHexBinary: Time to parse 100000 times: 845.1456ms Result as base64: BMY/eEJ0DHflRbsLKt6Qs4TxGfarV7aAt6pXWi9Ak58= BitConverter'd: 04-C6-3F-78-42-74-0C-77-E5-45-BB-0B-2A-DE-90-B3-84-F1-19-F6-AB-5 7-B6-80-B7-AA-57-5A-2F-40-93-9F
The base64 and 'BitConverter'd' lines are there to test for correctness. Note that they are equal.
The implementation:
public static byte[] ToByteArrayFromHex(string hexString) { if (hexString.Length % 2 != 0) throw new ArgumentException("String must have an even length"); var array = new byte[hexString.Length / 2]; for (int i = 0; i < hexString.Length; i += 2) { array[i/2] = ByteFromTwoChars(hexString[i], hexString[i + 1]); } return array; } private static byte ByteFromTwoChars(char p, char p_2) { byte ret; if (p <= '9' && p >= '0') { ret = (byte) ((p - '0') << 4); } else if (p <= 'f' && p >= 'a') { ret = (byte) ((p - 'a' + 10) << 4); } else if (p <= 'F' && p >= 'A') { ret = (byte) ((p - 'A' + 10) << 4); } else throw new ArgumentException("Char is not a hex digit: " + p,"p"); if (p_2 <= '9' && p_2 >= '0') { ret |= (byte) ((p_2 - '0')); } else if (p_2 <= 'f' && p_2 >= 'a') { ret |= (byte) ((p_2 - 'a' + 10)); } else if (p_2 <= 'F' && p_2 >= 'A') { ret |= (byte) ((p_2 - 'A' + 10)); } else throw new ArgumentException("Char is not a hex digit: " + p_2, "p_2"); return ret; }
I tried some stuff with unsafe
and moving the (clearly redundant) character-to-nibble if
sequence to another method, but this was the fastest it got.
(I concede that this answers half the question. I felt that the string->byte[] conversion was underrepresented, while the byte[]->string angle seems to be well covered. Thus, this answer.)
Safe versions:
public static class HexHelper { [System.Diagnostics.Contracts.Pure] public static string ToHex(this byte[] value) { if (value == null) throw new ArgumentNullException("value"); const string hexAlphabet = @"0123456789ABCDEF"; var chars = new char[checked(value.Length * 2)]; unchecked { for (int i = 0; i < value.Length; i++) { chars[i * 2] = hexAlphabet[value[i] >> 4]; chars[i * 2 + 1] = hexAlphabet[value[i] & 0xF]; } } return new string(chars); } [System.Diagnostics.Contracts.Pure] public static byte[] FromHex(this string value) { if (value == null) throw new ArgumentNullException("value"); if (value.Length % 2 != 0) throw new ArgumentException("Hexadecimal value length must be even.", "value"); unchecked { byte[] result = new byte[value.Length / 2]; for (int i = 0; i < result.Length; i++) { // 0(48) - 9(57) -> 0 - 9 // A(65) - F(70) -> 10 - 15 int b = value[i * 2]; // High 4 bits. int val = ((b - '0') + ((('9' - b) >> 31) & -7)) << 4; b = value[i * 2 + 1]; // Low 4 bits. val += (b - '0') + ((('9' - b) >> 31) & -7); result[i] = checked((byte)val); } return result; } } }
Unsafe versions For those who prefer performance and do not afraid of unsafeness. About 35% faster ToHex and 10% faster FromHex.
public static class HexUnsafeHelper { [System.Diagnostics.Contracts.Pure] public static unsafe string ToHex(this byte[] value) { if (value == null) throw new ArgumentNullException("value"); const string alphabet = @"0123456789ABCDEF"; string result = new string(' ', checked(value.Length * 2)); fixed (char* alphabetPtr = alphabet) fixed (char* resultPtr = result) { char* ptr = resultPtr; unchecked { for (int i = 0; i < value.Length; i++) { *ptr++ = *(alphabetPtr + (value[i] >> 4)); *ptr++ = *(alphabetPtr + (value[i] & 0xF)); } } } return result; } [System.Diagnostics.Contracts.Pure] public static unsafe byte[] FromHex(this string value) { if (value == null) throw new ArgumentNullException("value"); if (value.Length % 2 != 0) throw new ArgumentException("Hexadecimal value length must be even.", "value"); unchecked { byte[] result = new byte[value.Length / 2]; fixed (char* valuePtr = value) { char* valPtr = valuePtr; for (int i = 0; i < result.Length; i++) { // 0(48) - 9(57) -> 0 - 9 // A(65) - F(70) -> 10 - 15 int b = *valPtr++; // High 4 bits. int val = ((b - '0') + ((('9' - b) >> 31) & -7)) << 4; b = *valPtr++; // Low 4 bits. val += (b - '0') + ((('9' - b) >> 31) & -7); result[i] = checked((byte)val); } } return result; } } }
BTW For benchmark testing initializing alphabet every time convert function called is wrong, alphabet must be const (for string) or static readonly (for char[]). Then alphabet-based conversion of byte[] to string becomes as fast as byte manipulation versions.
And of course test must be compiled in Release (with optimization) and with debug option "Suppress JIT optimization" turned off (same for "Enable Just My Code" if code must be debuggable).
Why make it complex? This is simple in Visual Studio 2008:
C#:
string hex = BitConverter.ToString(YourByteArray).Replace("-", "");
VB:
Dim hex As String = BitConverter.ToString(YourByteArray).Replace("-", "")
Inverse function for Waleed Eissa code (Hex String To Byte Array):
public static byte[] HexToBytes(this string hexString) { byte[] b = new byte[hexString.Length / 2]; char c; for (int i = 0; i < hexString.Length / 2; i++) { c = hexString[i * 2]; b[i] = (byte)((c < 0x40 ? c - 0x30 : (c < 0x47 ? c - 0x37 : c - 0x57)) << 4); c = hexString[i * 2 + 1]; b[i] += (byte)(c < 0x40 ? c - 0x30 : (c < 0x47 ? c - 0x37 : c - 0x57)); } return b; }
Waleed Eissa function with lower case support:
public static string BytesToHex(this byte[] barray, bool toLowerCase = true) { byte addByte = 0x37; if (toLowerCase) addByte = 0x57; char[] c = new char[barray.Length * 2]; byte b; for (int i = 0; i < barray.Length; ++i) { b = ((byte)(barray[i] >> 4)); c[i * 2] = (char)(b > 9 ? b + addByte : b + 0x30); b = ((byte)(barray[i] & 0xF)); c[i * 2 + 1] = (char)(b > 9 ? b + addByte : b + 0x30); } return new string(c); }
Extension methods (disclaimer: completely untested code, BTW…):
public static class ByteExtensions { public static string ToHexString(this byte[] ba) { StringBuilder hex = new StringBuilder(ba.Length * 2); foreach (byte b in ba) { hex.AppendFormat("{0:x2}", b); } return hex.ToString(); } }
etc.. Use either of Tomalak's three solutions (with the last one being an extension method on a string).
From Microsoft's developers, a nice, simple conversion:
public static string ByteArrayToString(byte[] ba) { // Concatenate the bytes into one long string return ba.Aggregate(new StringBuilder(32), (sb, b) => sb.Append(b.ToString("X2")) ).ToString(); }
While the above is clean an compact, performance junkies will scream about it using enumerators. You can get peak performance with an improved version of Tomolak's original answer:
public static string ByteArrayToString(byte[] ba) { StringBuilder hex = new StringBuilder(ba.Length * 2); for(int i=0; i < ga.Length; i++) // <-- Use for loop is faster than foreach hex.Append(ba[i].ToString("X2")); // <-- ToString is faster than AppendFormat return hex.ToString(); }
This is the fastest of all the routines I've seen posted here so far. Don't just take my word for it… performance test each routine and inspect its CIL code for yourself.
In terms of speed, this seems to be better than anything here:
public static string ToHexString(byte[] data) { byte b; int i, j, k; int l = data.Length; char[] r = new char[l * 2]; for (i = 0, j = 0; i < l; ++i) { b = data[i]; k = b >> 4; r[j++] = (char)(k > 9 ? k + 0x37 : k + 0x30); k = b & 15; r[j++] = (char)(k > 9 ? k + 0x37 : k + 0x30); } return new string(r); }
I did not get the code you suggested to work, Olipro. hex[i] + hex[i+1]
apparently returned an int
.
I did, however have some success by taking some hints from Waleeds code and hammering this together. It's ugly as hell but it seems to work and performs at 1/3 of the time compared to the others according to my tests (using patridges testing mechanism). Depending on input size. Switching around the ?:s to separate out 0-9 first would probably yield a slightly faster result since there are more numbers than letters.
public static byte[] StringToByteArray2(string hex) { byte[] bytes = new byte[hex.Length/2]; int bl = bytes.Length; for (int i = 0; i < bl; ++i) { bytes[i] = (byte)((hex[2 * i] > 'F' ? hex[2 * i] - 0x57 : hex[2 * i] > '9' ? hex[2 * i] - 0x37 : hex[2 * i] - 0x30) << 4); bytes[i] |= (byte)(hex[2 * i + 1] > 'F' ? hex[2 * i + 1] - 0x57 : hex[2 * i + 1] > '9' ? hex[2 * i + 1] - 0x37 : hex[2 * i + 1] - 0x30); } return bytes; }
This version of ByteArrayToHexViaByteManipulation could be faster.
From my reports:
- ByteArrayToHexViaByteManipulation3: 1,68 average ticks (over 1000 runs), 17,5X
- ByteArrayToHexViaByteManipulation2: 1,73 average ticks (over 1000 runs), 16,9X
- ByteArrayToHexViaByteManipulation: 2,90 average ticks (over 1000 runs), 10,1X
- ByteArrayToHexViaLookupAndShift: 3,22 average ticks (over 1000 runs), 9,1X
-
…
static private readonly char[] hexAlphabet = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; static string ByteArrayToHexViaByteManipulation3(byte[] bytes) { char[] c = new char[bytes.Length * 2]; byte b; for (int i = 0; i < bytes.Length; i++) { b = ((byte)(bytes[i] >> 4)); c[i * 2] = hexAlphabet[b]; b = ((byte)(bytes[i] & 0xF)); c[i * 2 + 1] = hexAlphabet[b]; } return new string(c); }
And I think this one is an optimization:
static private readonly char[] hexAlphabet = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; static string ByteArrayToHexViaByteManipulation4(byte[] bytes) { char[] c = new char[bytes.Length * 2]; for (int i = 0, ptr = 0; i < bytes.Length; i++, ptr += 2) { byte b = bytes[i]; c[ptr] = hexAlphabet[b >> 4]; c[ptr + 1] = hexAlphabet[b & 0xF]; } return new string(c); }
I'll enter this bit fiddling competition as I have an answer that also uses bit-fiddling to decode hexadecimals. Note that using character arrays may be even faster as calling StringBuilder
methods will take time as well.
public static String ToHex (byte[] data) { int dataLength = data.Length; // pre-create the stringbuilder using the length of the data * 2, precisely enough StringBuilder sb = new StringBuilder (dataLength * 2); for (int i = 0; i < dataLength; i++) { int b = data [i]; // check using calculation over bits to see if first tuple is a letter // isLetter is zero if it is a digit, 1 if it is a letter int isLetter = (b >> 7) & ((b >> 6) | (b >> 5)) & 1; // calculate the code using a multiplication to make up the difference between // a digit character and an alphanumerical character int code = '0' + ((b >> 4) & 0xF) + isLetter * ('A' - '9' - 1); // now append the result, after casting the code point to a character sb.Append ((Char)code); // do the same with the lower (less significant) tuple isLetter = (b >> 3) & ((b >> 2) | (b >> 1)) & 1; code = '0' + (b & 0xF) + isLetter * ('A' - '9' - 1); sb.Append ((Char)code); } return sb.ToString (); } public static byte[] FromHex (String hex) { // pre-create the array int resultLength = hex.Length / 2; byte[] result = new byte[resultLength]; // set validity = 0 (0 = valid, anything else is not valid) int validity = 0; int c, isLetter, value, validDigitStruct, validDigit, validLetterStruct, validLetter; for (int i = 0, hexOffset = 0; i < resultLength; i++, hexOffset += 2) { c = hex [hexOffset]; // check using calculation over bits to see if first char is a letter // isLetter is zero if it is a digit, 1 if it is a letter (upper & lowercase) isLetter = (c >> 6) & 1; // calculate the tuple value using a multiplication to make up the difference between // a digit character and an alphanumerical character // minus 1 for the fact that the letters are not zero based value = ((c & 0xF) + isLetter * (-1 + 10)) << 4; // check validity of all the other bits validity |= c >> 7; // changed to >>, maybe not OK, use UInt? validDigitStruct = (c & 0x30) ^ 0x30; validDigit = ((c & 0x8) >> 3) * (c & 0x6); validity |= (isLetter ^ 1) * (validDigitStruct | validDigit); validLetterStruct = c & 0x18; validLetter = (((c - 1) & 0x4) >> 2) * ((c - 1) & 0x2); validity |= isLetter * (validLetterStruct | validLetter); // do the same with the lower (less significant) tuple c = hex [hexOffset + 1]; isLetter = (c >> 6) & 1; value ^= (c & 0xF) + isLetter * (-1 + 10); result [i] = (byte)value; // check validity of all the other bits validity |= c >> 7; // changed to >>, maybe not OK, use UInt? validDigitStruct = (c & 0x30) ^ 0x30; validDigit = ((c & 0x8) >> 3) * (c & 0x6); validity |= (isLetter ^ 1) * (validDigitStruct | validDigit); validLetterStruct = c & 0x18; validLetter = (((c - 1) & 0x4) >> 2) * ((c - 1) & 0x2); validity |= isLetter * (validLetterStruct | validLetter); } if (validity != 0) { throw new ArgumentException ("Hexadecimal encoding incorrect for input " + hex); } return result; }
Converted from Java code.
And for inserting into an SQL string (if you're not using command parameters):
public static String ByteArrayToSQLHexString(byte[] Source) { return = "0x" + BitConverter.ToString(Source).Replace("-", ""); }
Yet another variation for diversity:
public static byte[] FromHexString(string src) { if (String.IsNullOrEmpty(src)) return null; int index = src.Length; int sz = index / 2; if (sz <= 0) return null; byte[] rc = new byte[sz]; while (--sz >= 0) { char lo = src[--index]; char hi = src[--index]; rc[sz] = (byte)( ( (hi >= '0' && hi <= '9') ? hi - '0' : (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 : (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 : 0 ) << 4 | ( (lo >= '0' && lo <= '9') ? lo - '0' : (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 : (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 : 0 ) ); } return rc; }
Not optimized for speed, but more LINQy than most answers (.NET 4.0):
<Extension()> Public Function FromHexToByteArray(hex As String) As Byte() hex = If(hex, String.Empty) If hex.Length Mod 2 = 1 Then hex = "0" & hex Return Enumerable.Range(0, hex.Length \ 2).Select(Function(i) Convert.ToByte(hex.Substring(i * 2, 2), 16)).ToArray End Function <Extension()> Public Function ToHexString(bytes As IEnumerable(Of Byte)) As String Return String.Concat(bytes.Select(Function(b) b.ToString("X2"))) End Function
Two mashups which folds the two nibble operations into one.
Probably pretty efficient version:
public static string ByteArrayToString2(byte[] ba) { char[] c = new char[ba.Length * 2]; for( int i = 0; i < ba.Length * 2; ++i) { byte b = (byte)((ba[i>>1] >> 4*((i&1)^1)) & 0xF); c[i] = (char)(55 + b + (((b-10)>>31)&-7)); } return new string( c ); }
Decadent linq-with-bit-hacking version:
public static string ByteArrayToString(byte[] ba) { return string.Concat( ba.SelectMany( b => new int[] { b >> 4, b & 0xF }).Select( b => (char)(55 + b + (((b-10)>>31)&-7))) ); }
And reverse:
public static byte[] HexStringToByteArray( string s ) { byte[] ab = new byte[s.Length>>1]; for( int i = 0; i < s.Length; i++ ) { int b = s[i]; b = (b - '0') + ((('9' - b)>>31)&-7); ab[i>>1] |= (byte)(b << 4*((i&1)^1)); } return ab; }
Another way is by using stackalloc
to reduce GC memory pressure:
static string ByteToHexBitFiddle(byte[] bytes) { var c = stackalloc char[bytes.Length * 2 + 1]; int b; for (int i = 0; i < bytes.Length; ++i) { b = bytes[i] >> 4; c[i * 2] = (char)(55 + b + (((b - 10) >> 31) & -7)); b = bytes[i] & 0xF; c[i * 2 + 1] = (char)(55 + b + (((b - 10) >> 31) & -7)); } c[bytes.Length * 2 ] = '\0'; return new string(c); }
Here's my shot at it. I've created a pair of extension classes to extend string and byte. On the large file test, the performance is comparable to Byte Manipulation 2.
The code below for ToHexString is an optimized implementation of the lookup and shift algorithm. It is almost identical to the one by Behrooz, but it turns out using a foreach
to iterate and a counter is faster than an explicitly indexing for
.
It comes in 2nd place behind Byte Manipulation 2 on my machine and is very readable code. The following test results are also of interest:
ToHexStringCharArrayWithCharArrayLookup: 41,589.69 average ticks (over 1000 runs), 1.5X ToHexStringCharArrayWithStringLookup: 50,764.06 average ticks (over 1000 runs), 1.2X ToHexStringStringBuilderWithCharArrayLookup: 62,812.87 average ticks (over 1000 runs), 1.0X
Based on the above results it seems safe to conclude that:
- The penalties for indexing into a string to perform the lookup vs. a char array are significant in the large file test.
- The penalties for using a StringBuilder of known capacity vs. a char array of known size to create the string are even more significant.
代码如下:
using System; namespace ConversionExtensions { public static class ByteArrayExtensions { private readonly static char[] digits = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; public static string ToHexString(this byte[] bytes) { char[] hex = new char[bytes.Length * 2]; int index = 0; foreach (byte b in bytes) { hex[index++] = digits[b >> 4]; hex[index++] = digits[b & 0x0F]; } return new string(hex); } } } using System; using System.IO; namespace ConversionExtensions { public static class StringExtensions { public static byte[] ToBytes(this string hexString) { if (!string.IsNullOrEmpty(hexString) && hexString.Length % 2 != 0) { throw new FormatException("Hexadecimal string must not be empty and must contain an even number of digits to be valid."); } hexString = hexString.ToUpperInvariant(); byte[] data = new byte[hexString.Length / 2]; for (int index = 0; index < hexString.Length; index += 2) { int highDigitValue = hexString[index] <= '9' ? hexString[index] - '0' : hexString[index] - 'A' + 10; int lowDigitValue = hexString[index + 1] <= '9' ? hexString[index + 1] - '0' : hexString[index + 1] - 'A' + 10; if (highDigitValue < 0 || lowDigitValue < 0 || highDigitValue > 15 || lowDigitValue > 15) { throw new FormatException("An invalid digit was encountered. Valid hexadecimal digits are 0-9 and AF."); } else { byte value = (byte)((highDigitValue << 4) | (lowDigitValue & 0x0F)); data[index / 2] = value; } } return data; } } }
Below are the test results that I got when I put my code in @patridge's testing project on my machine. I also added a test for converting to a byte array from hexadecimal. The test runs that exercised my code are ByteArrayToHexViaOptimizedLookupAndShift and HexToByteArrayViaByteManipulation. The HexToByteArrayViaConvertToByte was taken from XXXX. The HexToByteArrayViaSoapHexBinary is the one from @Mykroft's answer.
Intel Pentium III Xeon processor
Cores: 4 <br/> Current Clock Speed: 1576 <br/> Max Clock Speed: 3092 <br/>
Converting array of bytes into hexadecimal string representation
ByteArrayToHexViaByteManipulation2: 39,366.64 average ticks (over 1000 runs), 22.4X
ByteArrayToHexViaOptimizedLookupAndShift: 41,588.64 average ticks (over 1000 runs), 21.2X
ByteArrayToHexViaLookup: 55,509.56 average ticks (over 1000 runs), 15.9X
ByteArrayToHexViaByteManipulation: 65,349.12 average ticks (over 1000 runs), 13.5X
ByteArrayToHexViaLookupAndShift: 86,926.87 average ticks (over 1000 runs), 10.2X
ByteArrayToHexStringViaBitConverter: 139,353.73 average ticks (over 1000 runs),6.3X
ByteArrayToHexViaSoapHexBinary: 314,598.77 average ticks (over 1000 runs), 2.8X
ByteArrayToHexStringViaStringBuilderForEachByteToString: 344,264.63 average ticks (over 1000 runs), 2.6X
ByteArrayToHexStringViaStringBuilderAggregateByteToString: 382,623.44 average ticks (over 1000 runs), 2.3X
ByteArrayToHexStringViaStringBuilderForEachAppendFormat: 818,111.95 average ticks (over 1000 runs), 1.1X
ByteArrayToHexStringViaStringConcatArrayConvertAll: 839,244.84 average ticks (over 1000 runs), 1.1X
ByteArrayToHexStringViaStringBuilderAggregateAppendFormat: 867,303.98 average ticks (over 1000 runs), 1.0X
ByteArrayToHexStringViaStringJoinArrayConvertAll: 882,710.28 average ticks (over 1000 runs), 1.0X
For performance I would go with drphrozens solution. A tiny optimization for the decoder could be to use a table for either char to get rid of the "<< 4".
Clearly the two method calls are costly. If some kind of check is made either on input or output data (could be CRC, checksum or whatever) the if (b == 255)...
could be skipped and thereby also the method calls altogether.
Using offset++
and offset
instead of offset
and offset + 1
might give some theoretical benefit but I suspect the compiler handles this better than me.
private static readonly byte[] LookupTableLow = new byte[] { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; private static readonly byte[] LookupTableHigh = new byte[] { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; private static byte LookupLow(char c) { var b = LookupTableLow[c]; if (b == 255) throw new IOException("Expected a hex character, got " + c); return b; } private static byte LookupHigh(char c) { var b = LookupTableHigh[c]; if (b == 255) throw new IOException("Expected a hex character, got " + c); return b; } public static byte ToByte(char[] chars, int offset) { return (byte)(LookupHigh(chars[offset++]) | LookupLow(chars[offset])); }
This is just off the top of my head and has not been tested or benchmarked.
If performance matters, here's an optimized solution:
static readonly char[] _hexDigits = "0123456789abcdef".ToCharArray(); public static string ToHexString(this byte[] bytes) { char[] digits = new char[bytes.Length * 2]; for (int i = 0; i < bytes.Length; i++) { int d1, d2; d1 = Math.DivRem(bytes[i], 16, out d2); digits[2 * i] = _hexDigits[d1]; digits[2 * i + 1] = _hexDigits[d2]; } return new string(digits); }
It's about 2.5 times faster that BitConverter.ToString
, and about 7 times faster that BitConverter.ToString
+ removal of the '-' chars.