vb html 乱码,vb 中文乱码怎么解决?

235125708a7b7ab0455050962d92628d.png

需要对下载下来的网页进行编码转换,将UTF-8转成Unicode

新建一模块名为Module_UTF8:

Option Explicit

Public m_bIsNt     As Boolean

Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long

Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long

Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)

Public Const CP_UTF8 = 65001

'Purpose:Convert   Utf8   to   Unicode

Public Function UTF8_Decode(ByVal sUTF8 As String) As String

Dim lngUtf8Size               As Long

Dim strBuffer                   As String

Dim lngBufferSize           As Long

Dim lngResult                   As Long

Dim bytUtf8()                   As Byte

Dim n                                   As Long

If LenB(sUTF8) = 0 Then Exit Function

If m_bIsNt Then

On Error GoTo EndFunction

bytUtf8 = StrConv(sUTF8, vbFromUnicode)

lngUtf8Size = UBound(bytUtf8) + 1

On Error GoTo 0

'Set   buffer   for   longest   possible   string   i.e.   each   byte   is

'ANSI,   thus   1   unicode(2   bytes)for   every   utf-8   character.

lngBufferSize = lngUtf8Size * 2

strBuffer = String$(lngBufferSize, vbNullChar)

'Translate   using   code   page   65001(UTF-8)

lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _

lngUtf8Size, StrPtr(strBuffer), lngBufferSize)

'Trim   result   to   actual   length

If lngResult Then

UTF8_Decode = Left$(strBuffer, lngResult)

End If

Else

Dim i                                   As Long

Dim TopIndex                     As Long

Dim TwoBytes(1)               As Byte

Dim ThreeBytes(2)           As Byte

Dim AByte                           As Byte

Dim TStr                             As String

Dim BArray()                     As Byte

'Resume   on   error   in   case   someone   inputs   text   with   accents

'that   should   have   been   encoded   as   UTF-8

On Error Resume Next

TopIndex = Len(sUTF8)         '   Number   of   bytes   equal   TopIndex+1

If TopIndex = 0 Then Exit Function               '   get   out   if   there's   nothing   to   convert

BArray = StrConv(sUTF8, vbFromUnicode)

i = 0       '   Initialise   pointer

TopIndex = TopIndex - 1

'   Iterate   through   the   Byte   Array

Do While i <= TopIndex

AByte = BArray(i)

If AByte < &H80 Then

'   Normal   ANSI   character   -   use   it   as   is

TStr = TStr & Chr$(AByte):           i = i + 1           '   Increment   byte   array   index

ElseIf AByte >= &HE0 Then                           'was   =   &HE1   Then

'   Start   of   3   byte   UTF-8   group   for   a   character

'   Copy   3   byte   to   ThreeBytes

ThreeBytes(0) = BArray(i):       i = i + 1

ThreeBytes(1) = BArray(i):       i = i + 1

ThreeBytes(2) = BArray(i):       i = i + 1

'   Convert   Byte   array   to   UTF-16   then   Unicode

TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F))

ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then

'   Start   of   2   byte   UTF-8   group   for   a   character

TwoBytes(0) = BArray(i):       i = i + 1

TwoBytes(1) = BArray(i):       i = i + 1

'   Convert   Byte   array   to   UTF-16   then   Unicode

TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F))

Else

'   Normal   ANSI   character   -   use   it   as   is

TStr = TStr & Chr$(AByte):           i = i + 1           '   Increment   byte   array   index

End If

Loop

UTF8_Decode = TStr             '   Return   the   resultant   string

Erase BArray

End If

EndFunction:

End Function

'Purpose:Convert   Unicode   string   to   UTF-8.

Public Function UTF8_Encode(ByVal strUnicode As String, Optional ByVal bHTML As Boolean = True) As String

Dim i                                   As Long

Dim TLen                             As Long

Dim lPtr                             As Long

Dim UTF16                           As Long

Dim UTF8_EncodeLong       As String

TLen = Len(strUnicode)

If TLen = 0 Then Exit Function

If m_bIsNt Then

Dim lngBufferSize           As Long

Dim lngResult                   As Long

Dim bytUtf8()                   As Byte

'Set   buffer   for   longest   possible   string.

lngBufferSize = TLen * 3 + 1

ReDim bytUtf8(lngBufferSize - 1)

'Translate   using   code   page   65001(UTF-8).

lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), _

TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)

'Trim   result   to   actual   length.

If lngResult Then

lngResult = lngResult - 1

ReDim Preserve bytUtf8(lngResult)

'CopyMemory   StrPtr(UTF8_Encode),   bytUtf8(0&),   lngResult

UTF8_Encode = StrConv(bytUtf8, vbUnicode)

'   For   i   =   0   To   lngResult

'         UTF8_Encode   =   UTF8_Encode   &   Chr$(bytUtf8(i))

'   Next

End If

Else

For i = 1 To TLen

'   Get   UTF-16   value   of   Unicode   character

lPtr = StrPtr(strUnicode) + ((i - 1) * 2)

CopyMemory UTF16, ByVal lPtr, 2

'Convert   to   UTF-8

If UTF16 < &H80 Then                                                                                     '   1   UTF-8   byte

UTF8_EncodeLong = Chr$(UTF16)

ElseIf UTF16 < &H800 Then                                                                           '   2   UTF-8   bytes

UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F))                                         '   Least   Significant   6   bits

UTF16 = UTF16 \ &H40                                                                               '   Shift   right   6   bits

UTF8_EncodeLong = Chr$(&HC0 + (UTF16 And &H1F)) & UTF8_EncodeLong                     '   Use   5   remaining   bits

Else                                                                                                             '   3   UTF-8   bytes

UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F))                                         '   Least   Significant   6   bits

UTF16 = UTF16 \ &H40                                                                               '   Shift   right   6   bits

UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) & UTF8_EncodeLong                     '   Use   next   6   bits

UTF16 = UTF16 \ &H40                                                                               '   Shift   right   6   bits

UTF8_EncodeLong = Chr$(&HE0 + (UTF16 And &HF)) & UTF8_EncodeLong                       '   Use   4   remaining   bits

End If

UTF8_Encode = UTF8_Encode & UTF8_EncodeLong

Next

End If

'Substitute   vbCrLf   with   HTML   line   breaks   if   requested.

If bHTML Then

UTF8_Encode = Replace$(UTF8_Encode, vbCrLf, "
")

End If

End Function

◆◆

评论读取中....

请登录后再发表评论!

◆◆

修改失败,请稍后尝试

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
VB.Net中,当抓取网页时出现乱码的问题可以通过以下方法解决。首先,网络上一些常见的解决乱码的方法如使用请求头、Html中的编码并不能完美解决乱码问题。在调试VB.Net调用Microsoft.XMLHttp组件抓取网页时,发现当网页的meta标签charset为utf-8时不会乱码,而charset为Gb2312时会出现乱码。因此,解决乱码问题的一个完整的方法是手动指定编码。在VB.Net中,可以使用Encoding类中的GetEncoding方法指定编码进行解码,例如使用Encoding.GetEncoding("utf-8")解码utf-8编码的网页内容。这样可以确保正确地读取并显示网页内容,解决乱码问题。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *3* [C#、VB.NET、ASP.NET 通用解决获取网页源码乱码问题原因,中文文本乱码完美方案。](https://blog.csdn.net/wq1282/article/details/82953254)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] - *2* [VB.Net抓取网页乱码解决方法](https://blog.csdn.net/weixin_29938187/article/details/118062320)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值