VB6.0 判断一个文件的编码格式

很早以前网上找的代码,怕以后找不到了,先留下记录

具体逻辑我也不清楚,反正好用。

'判断文本文件格式
Public Function GetCode(ByVal myFileName As String)
    Dim i As Long
    Dim n As Long
    Dim str1, str2, str3 As String
    
    Open myFileName For Binary Access Read As #1
        n = LOF(1) - 1
        ReDim Tmp(n) As Byte
        ReDim tp(n)
        Get #1, , Tmp
    Close #1
    
    For i = 0 To n
        tp(i) = ChrW(Tmp(i)) '返回与ANSI 字符代码相对应的字符
    Next
    
    str1 = Tmp(0) & Tmp(1) '前二个
    str2 = str1 & Tmp(2) '前三个
    str3 = Join(tp, "")
    If str1 = "255254" Then
        GetCode = "Unicode"
    ElseIf str1 = "254255" Then
        GetCode = "Unicode Big Endian"
    ElseIf str2 = "239187191" Then
        GetCode = "UTF-8"
    ElseIf is_valid_utf8(str3) Then '判断是否UTF8
        GetCode = "UTF8_NOBOM"
    Else
        GetCode = "ANSI"
    End If
End Function

'下面是判断是否为UTF8
Public Function is_valid_utf8(ByRef str) 'ByRef以提高效率
    Dim S, mRegExp
    Set mRegExp = CreateObject("VbScript.regexp")
    S = "[\xC0-\xDF]([^\x80-\xBF]|$)"
    S = S & "|[\xE0-\xEF].{0,1}([^\x80-\xBF]|$)"
    S = S & "|[\xF0-\xF7].{0,2}([^\x80-\xBF]|$)"
    S = S & "|[\xF8-\xFB].{0,3}([^\x80-\xBF]|$)"
    S = S & "|[\xFC-\xFD].{0,4}([^\x80-\xBF]|$)"
    S = S & "|[\xFE-\xFE].{0,5}([^\x80-\xBF]|$)"
    S = S & "|[\x00-\x7F][\x80-\xBF]"
    S = S & "|[\xC0-\xDF].[\x80-\xBF]"
    S = S & "|[\xE0-\xEF]..[\x80-\xBF]"
    S = S & "|[\xF0-\xF7]...[\x80-\xBF]"
    S = S & "|[\xF8-\xFB]....[\x80-\xBF]"
    S = S & "|[\xFC-\xFD].....[\x80-\xBF]"
    S = S & "|[\xFE-\xFE]......[\x80-\xBF]"
    S = S & "|^[\x80-\xBF]"
    mRegExp.Pattern = S
    is_valid_utf8 = (Not mRegExp.test(str))
End Function

下面是具体使用方法

Debug.Print GetCode(FileName)

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值