将简体中文GBK(ANSI-936)编码格式的文本转换成UTF-8编码格式
:: 将ANSI-936编码格式的文本转换成UTF-8编码格式
::
:: 使用方法: call :ANSI936_to_UTF8 fileOut fileIn
::
:: 输入ANSI编码格式的文本文档fileIn, 输出UTF-8编码格式的文本文档strOut
::
:ANSI936_to_UTF8
CALL :GET_SYS_DEFAULT_ACTIVE_CODE_PAGE bf_CODE_PAGE
IF NOT "%POWERSHELL_EXE%"=="" (GOTO :A2U_TO_NEXT_1)
FOR /F "usebackq delims=" %%i IN (`WHERE PowerShell.exe 2^>NUL`) DO (CALL,SET POWERSHELL_EXE=%%i)
IF "%POWERSHELL_EXE%"=="" (GOTO :A2U_USE_CMD)
:A2U_TO_NEXT_1
FOR /F "usebackq tokens=2,3,* delims=. " %%i IN (`^(^^^"%POWERSHELL_EXE%^^^" $PSVersionTable 2^>NUL^)^|^(FINDSTR /C:^^^"PSVersion ^^^"^)`) DO (CALL,SET POWERSHELL_VER=%%i.%%j)
IF %POWERSHELL_VER% GEQ 6.2 (GOTO :A2U_USE_CMD)
:: 从 PowerShell 6.2 开始,Encoding 参数还允许注册代码页的数字 ID(如 -Encoding 1251)
::
:: 或已注册代码页的字符串名称(如 -Encoding "windows-1251")
::
("%POWERSHELL_EXE%" Get-Content -Encoding 936 "%~2" ^| Out-File -Encoding Unicode "%TEMP%\U.bom") 1>NUL 2>NUL
GOTO :A2U_OUTPUT_UTF8
:: GOTO :EOF
:A2U_USE_CMD
IF "%CERTUTIL_EXE%"=="" ( FOR /F "usebackq delims=" %%i IN (`WHERE certutil.exe 2^>NUL`) DO (CALL,SET CERTUTIL_EXE=%%i) )
IF "%CERTUTIL_EXE%"=="" (
ECHO Error: The system can NOT find the specified file: certutil.exe.
:: 2-系统找不到指定的文件。
CALL :PAUSE_AND_EXIT /B 2
)
:: 1252: ANSI-拉丁语I
:: CHCP 1252 1>NUL 2>&1
ECHO FFFE>"%TEMP%\U.bom"
("%CERTUTIL_EXE%" -f -Unicode -decodehex "%TEMP%\U.bom" "%TEMP%\U.bom" 2) 1>NUL 2>&1
SET "CERTUTIL_EXE="
:: 936: ANSI/OEM-简体中文GBK
CHCP 936 1>NUL 2>&1
:: /C参数只能放最后
CMD /U/D/C TYPE "%~2">>"%TEMP%\U.bom"
:A2U_OUTPUT_UTF8
:: 936: ANSI/OEM-简体中文GBK
CHCP 65001 1>NUL 2>&1
TYPE "%TEMP%\U.bom">"%~1"
CHCP %bf_CODE_PAGE% 1>NUL 2>&1
SET "bf_CODE_PAGE="
DEL /F /Q "%TEMP%\U.bom" 1>NUL 2>&1
GOTO :EOF
:: END OF ANSI936_to_UTF8