文件编码方式批量转换

最新推荐文章于 2024-04-10 15:03:20 发布

xxgui1992

最新推荐文章于 2024-04-10 15:03:20 发布

阅读量540

点赞数 1

分类专栏： shell脚本与工具

本文链接：https://blog.csdn.net/u010243305/article/details/85841635

版权

shell脚本与工具专栏收录该内容

3 篇文章 0 订阅

订阅专栏

脚本formatCH.sh

#!/bin/bash

if [ $# != 1 ]
then
	echo "must with target file"
	exit
fi


dos2unix $1

##去除行末的空格与制表符
sed  's/[ \t]*$//g' <$1  > $1_f1

mv $1_f1 $1

上述脚本虽然删除了行尾空格，但会在文件尾部带来^@
自写一个工具解决此问题

#include <stdio.h>

int getlastline(FILE* fp, char buf[1024])
{
	
	while(!feof(fp))  
	{
		fgets(buf,sizeof(buf),fp); 
		if(feof(fp))    
			break;
	}

	return 0;
}


int findenterkey(FILE *fp)
{
	char tmpbuf[1];
	int keycnt=0;
	long offset;
	fseek(fp,0,SEEK_END);
	offset = ftell(fp);
	while(1) {
		offset--;
		fseek(fp,offset,SEEK_SET);
		fread(tmpbuf,1,1,fp);
		if (tmpbuf[0] == '\n') {
			keycnt++;
			if(keycnt == 2) {
				break;
			}
		}
	}
	return offset+1;
}

int main(int argc, char *argv[])
{
	if(argc < 2) {
		printf("please input target file \n");
		return -1;
	}
	
	FILE* fp = fopen(argv[1], "a+");
	int fd;
	char buf[1024];
	long offset;
	getlastline(fp, buf);
	printf("%s\n",buf);
	if (buf[0] == 0 && buf[1]==0 ) {
		printf("%s is unformat file,will delete last line \n");
		offset = findenterkey(fp);
		printf("will inset EOF on seek %ld \n", offset);
		fd = fileno(fp);
		ftruncate(fd, offset);	
}
	
	fclose(fp);

	return 0;
}

在linux下运行脚本encode_convert.sh

#!/bin/bash

for f in `find $1 -name "*.[ch]"`
do
	encode=`file $f | awk '{print $4}'`
	echo "encode = $encode"	
	if [ $encode == "ISO-8859" ]
	 then
		iconv -f GBK -t UTF-8 $f  -o ${f}_utf8	
		rm $f
		mv ${f}_utf8 $f
	fi
	
	dos2unix $f
	##去除行末的空格与制表符
	sed  's/[ \t]*$//g' <$f  > ${f}_f1
	mv ${f}_f1 $f

done

完美解决中文乱码问题

xxgui1992

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
文件编码方式批量转换

在linux下运行如下脚本#!/bin/bashfor f in `find $1 -name &amp;quot;*.[ch]&amp;quot;`do encode=`file $f | awk '{print $4}'` if [ $encode = &amp;quot;ISO-8859&amp;quot; ]; then iconv -f GBK -t UTF-8 $f -o ${f}_utf8 rm $f mv ${f}_utf...
复制链接

扫一扫