UTF8 byte 与 String type相互转换

class MyEncoding
{
public static string EncodeUtf8ByteToString(byte[] buffer)
{
int count = 0;
int index = 0;
byte a = 0;
int utfLength = buffer.Length;
char[] result = new char[utfLength];

while (count < utfLength)
{
if ((result[index] = (char)buffer[count++]) < 0x80)
{
index++;
}
else if (((a = (byte)result[index]) & 0xE0) == 0xC0)
{
if (count >= utfLength)
{
throw new IOException("Invalid UTF-8 encoding found, start of two byte char found at end.");
}

byte b = buffer[count++];
if ((b & 0xC0) != 0x80)
{
throw new IOException(
"Invalid UTF-8 encoding found, byte two does not start with 0x80.");
}

result[index++] = (char)(((a & 0x1F) << 6) | (b & 0x3F));

}
else if ((a & 0xF0) == 0xE0)
{

if (count + 1 >= utfLength)
{
throw new IOException(
"Invalid UTF-8 encoding found, start of three byte char found at end.");
}

byte b = buffer[count++];
byte c = buffer[count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80))
{
throw new IOException(
"Invalid UTF-8 encoding found, byte two does not start with 0x80.");
}

result[index++] = (char)(((a & 0x0F) << 12) |
((b & 0x3F) << 6) | (c & 0x3F));

}
else
{
throw new IOException("Invalid UTF-8 encoding found, aborting.");
}
}
return new String(result, 0, index); ;
}

/// <summary>
/// Method ReadString
/// </summary>
/// <returns>A string</returns>
public static String ReadString(byte[] bytearr)
{
int utflen = bytearr.Length;
if (utflen > -1)
{
StringBuilder str = new StringBuilder(utflen);

//byte[] bytearr = new byte[utflen];
//int bytesRead = 0;
//while (bytesRead < utflen)
//{
// int rc = Read(bytearr, bytesRead, utflen - bytesRead);
// if (rc == 0)
// throw new IOException("premature end of stream");
// bytesRead += rc;
//}

int c, char2, char3;
int count = 0;

while (count < utflen)
{
c = bytearr[count] & 0xff;
switch (c >> 4)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
/* 0xxxxxxx */
count++;
str.Append((char)c);
break;
case 12:
case 13:
/* 110x xxxx 10xx xxxx */
count += 2;
if (count > utflen)
{
throw new IOException("Invalid UTF-8 encoding found, aborting.");
}
char2 = bytearr[count - 1];
if ((char2 & 0xC0) != 0x80)
{
throw new IOException("Invalid UTF-8 encoding found, aborting.");
}
str.Append((char)(((c & 0x1F) << 6) | (char2 & 0x3F)));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > utflen)
{
throw new IOException("Invalid UTF-8 encoding found, aborting.");
}
char2 = bytearr[count - 2];
char3 = bytearr[count - 1];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
{
throw new IOException("Invalid UTF-8 encoding found, aborting.");
}
str.Append((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new IOException("Invalid UTF-8 encoding found, aborting.");
}
}
// The number of chars produced may be less than utflen
return str.ToString();
}
else
{
return null;
}
}

public static byte[] EncodeStringToUtf8Byte(string text)
{
if (text != null)
{

char[] charr = text.ToCharArray();
uint utfLength = CountUtf8Bytes(charr);

if (utfLength > int.MaxValue)
{
throw new IOException(
String.Format(
"Cannot marshall an encoded string longer than: {0} bytes, supplied" +
"string requires: {1} characters to encode", int.MaxValue, utfLength));
}

byte[] bytearr = new byte[utfLength];
EncodeUTF8toBuffer(charr, bytearr);

return bytearr;

}
else
{
return new byte[0];
}
}

private static uint CountUtf8Bytes(char[] chars)
{
uint utfLength = 0;
int c = 0;

for (int i = 0; i < chars.Length; i++)
{
c = chars[i];
if ((c >= 0x0001) && (c <= 0x007F))
{
utfLength++;
}
else if (c > 0x07FF)
{
utfLength += 3;
}
else
{
utfLength += 2;
}
}

return utfLength;
}
private static void EncodeUTF8toBuffer(char[] chars, byte[] buffer)
{
int c = 0;
int count = 0;

for (int i = 0; i < chars.Length; i++)
{
c = chars[i];
if ((c >= 0x0001) && (c <= 0x007F))
{
buffer[count++] = (byte)c;
}
else if (c > 0x07FF)
{
buffer[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
buffer[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
buffer[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
}
else
{
buffer[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
buffer[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
}
}
}
}


Endian helper:

 public class EndianSupport
{

public static char SwitchEndian(char x)
{
return (char)(
(((char)((byte)(x))) << 8) |
(((char)((byte)(x >> 8))))
);
}

public static short SwitchEndian(short x)
{
return (short)(
(((ushort)((byte)(x))) << 8) |
(((ushort)((byte)(x >> 8))))
);
}

public static int SwitchEndian(int x)
{
return
(((int)((byte)(x))) << 24) |
(((int)((byte)(x >> 8))) << 16) |
(((int)((byte)(x >> 16))) << 8) |
(((int)((byte)(x >> 24))));
}

public static long SwitchEndian(long x)
{
return
(((long)((byte)(x))) << 56) |
(((long)((byte)(x >> 8))) << 48) |
(((long)((byte)(x >> 16))) << 40) |
(((long)((byte)(x >> 24))) << 32) |
(((long)((byte)(x >> 32))) << 24) |
(((long)((byte)(x >> 40))) << 16) |
(((long)((byte)(x >> 48))) << 8) |
(((long)((byte)(x >> 56))));
}

public static ushort SwitchEndian(ushort x)
{
return (ushort)(
(((ushort)((byte)(x))) << 8) |
(((ushort)((byte)(x >> 8))))
);
}

public static uint SwitchEndian(uint x)
{
return
(((uint)((byte)(x))) << 24) |
(((uint)((byte)(x >> 8))) << 16) |
(((uint)((byte)(x >> 16))) << 8) |
(((uint)((byte)(x >> 24))));
}

public static ulong SwitchEndian(ulong x)
{
return
(((ulong)((byte)(x))) << 56) |
(((ulong)((byte)(x >> 8))) << 48) |
(((ulong)((byte)(x >> 16))) << 40) |
(((ulong)((byte)(x >> 24))) << 32) |
(((ulong)((byte)(x >> 32))) << 24) |
(((ulong)((byte)(x >> 40))) << 16) |
(((ulong)((byte)(x >> 48))) << 8) |
(((ulong)((byte)(x >> 56))));
}

public static double SwitchEndian(double x)
{
MemoryStream ms = new MemoryStream();
BinaryWriter bw = new BinaryWriter(ms);
bw.Write(x);
bw.Flush();
ms = new MemoryStream(SwitchEndian(ms.ToArray()));
BinaryReader br = new BinaryReader(ms);
return br.ReadDouble();
}


public static float SwitchEndian(float x)
{
MemoryStream ms = new MemoryStream();
BinaryWriter bw = new BinaryWriter(ms);
bw.Write(x);
bw.Flush();
ms = new MemoryStream(SwitchEndian(ms.ToArray()));
BinaryReader br = new BinaryReader(ms);
return br.ReadSingle();
}


public static byte[] SwitchEndian(byte[] x)
{
byte[] rc = new byte[x.Length];
int j = x.Length - 1;
for (int i = 0; i < x.Length; i++)
{
rc[i] = x[j];
j--;
}
return rc;
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值