csv文件导入后台乱码_js-xlsx导入CSV文件中文乱码处理参考

最新推荐文章于 2024-06-18 10:49:09 发布

weixin_39785524

最新推荐文章于 2024-06-18 10:49:09 发布

阅读量1.7k

点赞数

文章标签： csv文件导入后台乱码

本文链接：https://blog.csdn.net/weixin_39785524/article/details/115063058

版权

本文介绍了当使用js-xlsx库导入CSV文件出现中文乱码时的解决方法，主要涉及文件编码的识别和转换。通过检查文件编码，将ANSI编码的CSV文件转换为UTF-8，以及利用isUTF8函数验证文件编码，确保正确读取CSV内容，避免乱码。同时，提供了处理二进制数据的辅助函数fixdata。

摘要由CSDN通过智能技术生成

刚刚有小伙伴问我使用js-xlsx导入CSV文件时会出现中文乱码,其实这种问题基本上都是因为文件的编码不对造成的,看过我简书CSV其实就是用逗号隔开,行数据进行换行格式的文本文件,所以我们只要从文件编码入手处理问题就行了

首先我新建一个CSV文件,右键用记事本打开,然后查看默认编码是ANSI(一般也就是GBK)

439fbc4a11a8

我们发现导入后显示乱码了,那我们修改下文件格式为UTF-8再次尝试看下

439fbc4a11a8

FileReader共有4种读取方法：

1.readAsArrayBuffer(file)：将文件读取为ArrayBuffer。

2.readAsBinaryString(file)：将文件读取为二进制字符串

3.readAsDataURL(file)：将文件读取为Data URL

4.readAsText(file, [encoding])：将文件读取为文本，encoding缺省值为'UTF-8'

var wb;//读取完成的数据

var rABS = false; //是否将文件读取为二进制字符串

var isCSV;

function importf(obj) {//导入

if (!obj.files) return;

var f = obj.files[0];

var reader = new FileReader();

reader.onload = function (e) {

var data = e.target.result;

wb = null;

if (isCSV) {

data = rABS ? new Uint8Array(data) : data;

var str = cptable.utils.decode(936, data);

wb = XLSX.read(str, { type: "string" });

}

if (!wb) {

wb = rABS ? XLSX.read(btoa(fixdata(data)), { type: 'base64' }) : XLSX.read(data, { type: 'binary' });

}

//wb.SheetNames[0]是获取Sheets中第一个Sheet的名字

//wb.Sheets[Sheet名]获取第一个Sheet的数据

document.getElementById("demo").innerHTML = JSON.stringify(XLSX.utils.sheet_to_json(wb.Sheets[wb.SheetNames[0]]));

};

isCSV = f.name.split(".").reverse()[0] == "csv";//判断是否是 CSV

if (rABS) {

reader.readAsArrayBuffer(f);

} else {

reader.readAsBinaryString(f);

}

obj.value = "";

}

function fixdata(data) { //文件流转BinaryString

var o = "",

l = 0,

w = 10240;

for (; l < data.byteLength / w; ++l) o += String.fromCharCode.apply(null, new Uint8Array(data.slice(l * w, l * w + w)));

o += String.fromCharCode.apply(null, new Uint8Array(data.slice(l * w)));

return o;

}

我们使用ASNI编码的文件进行导入测试看下

439fbc4a11a8

但是这时候又有问题了,那就是如果我导入的文件格式就是UTF-8时代码报错了

439fbc4a11a8

isUTF8.js

function isUTF8(bytes) {

var i = 0;

while (i < bytes.length) {

if ((// ASCII

bytes[i] == 0x09 ||

bytes[i] == 0x0A ||

bytes[i] == 0x0D ||

(0x20 <= bytes[i] && bytes[i] <= 0x7E)

)

) {

i += 1;

continue;

}

if ((// non-overlong 2-byte

(0xC2 <= bytes[i] && bytes[i] <= 0xDF) &&

(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF)

)

) {

i += 2;

continue;

}

if ((// excluding overlongs

bytes[i] == 0xE0 &&

(0xA0 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&

(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)

) ||

(// straight 3-byte

((0xE1 <= bytes[i] && bytes[i] <= 0xEC) ||

bytes[i] == 0xEE ||

bytes[i] == 0xEF) &&

(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&

(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)

) ||

(// excluding surrogates

bytes[i] == 0xED &&

(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0x9F) &&

(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)

)

) {

i += 3;

continue;

}

if ((// planes 1-3

bytes[i] == 0xF0 &&

(0x90 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&

(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&

(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)

) ||

(// planes 4-15

(0xF1 <= bytes[i] && bytes[i] <= 0xF3) &&

(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&

(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&

(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)

) ||

(// plane 16

bytes[i] == 0xF4 &&

(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0x8F) &&

(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&

(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)

)

) {

i += 4;

continue;

}

return false;

}

return true;

}

demo.html

FileReader共有4种读取方法：

1.readAsArrayBuffer(file)：将文件读取为ArrayBuffer。

2.readAsBinaryString(file)：将文件读取为二进制字符串

3.readAsDataURL(file)：将文件读取为Data URL

4.readAsText(file, [encoding])：将文件读取为文本，encoding缺省值为'UTF-8'

var wb;//读取完成的数据

var rABS = false; //是否将文件读取为二进制字符串

var isCSV;

function importf(obj) {//导入

if (!obj.files) return;

var f = obj.files[0];

var reader = new FileReader();

reader.onload = function (e) {

var data = e.target.result;

wb = null;

if (isCSV) {

data = new Uint8Array(data);

let f = isUTF8(data);

document.getElementById("ff").innerHTML = "是CSV文件,编码" + (f ? "是" : "不是") + "UTF-8";

if (f) {

data = e.target.result;

} else {

var str = cptable.utils.decode(936, data);

wb = XLSX.read(str, { type: "string" });

}

}else{

document.getElementById("ff").innerHTML ="不是CSV文件"

}

if (!wb) {

wb = rABS|| isCSV ? XLSX.read(btoa(fixdata(data)), { type: 'base64' }) : XLSX.read(data, { type: 'binary' });

}

//wb.SheetNames[0]是获取Sheets中第一个Sheet的名字

//wb.Sheets[Sheet名]获取第一个Sheet的数据

document.getElementById("demo").innerHTML = JSON.stringify(XLSX.utils.sheet_to_json(wb.Sheets[wb.SheetNames[0]]));

};

isCSV = f.name.split(".").reverse()[0] == "csv";//判断是否是 CSV

if (rABS || isCSV) {

reader.readAsArrayBuffer(f);

} else {

reader.readAsBinaryString(f);

}

obj.value = "";

}

function fixdata(data) { //文件流转BinaryString

var o = "",

l = 0,

w = 10240;

for (; l < data.byteLength / w; ++l) o += String.fromCharCode.apply(null, new Uint8Array(data.slice(l * w, l * w + w)));

o += String.fromCharCode.apply(null, new Uint8Array(data.slice(l * w)));

return o;

}

439fbc4a11a8

关于js-xlsx导入CSV文件中文乱码的说明大概就讲这么多了,一般情况下就这么处理就行了,如果因为其他文件编码造成乱码或者报错,可以参考这篇文章的解决思路进行处理吧 (￣▽￣)／

weixin_39785524

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫