utf8与ansi之间的转换

// file.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <iostream>
#include <string>
#include <vector>
#include <fstream>
#include <windows.h>
#include <boost/filesystem.hpp>

namespace fs = boost::filesystem;

const int COUNT = 1024;
const std::string ret_success = "success";
const std::string ret_src_open = "error:源文件打开错误";
const std::string ret_dst_open = "error:目标文件打开错误";
const std::string ret_over_line = "error:文件中某行字符数过大";
const std::string ret_type_convert = "error:转换类型不正确";

typedef enum { UTF8_ANSI, ANSI_UTF8} TYPE_CONVERT; // 转换类型
std::string utf8_ansi(std::string utf8_path, std::string ansi_path); // UTF8转换为ANSI
std::string ansi_utf8(std::string ansi_path, std::string utf8_path); // ANSI转换为UTF8
int get_filenames(const std::string& dir, std::vector<std::string>& filenames); // 获取目录下的所有文件名
void code_convert(const std::string& src_dir,
				  const std::string& dst_dir,
				  TYPE_CONVERT type_convert,
				  std::string expanded_names = ".h.cpp.txt"); // 转换目录下所有指定扩展名的文件,路径用"//"

int _tmain(int argc, _TCHAR* argv[])
{
	
	system("pause");
	return 0;
}

std::string utf8_ansi(std::string utf8_path, std::string ansi_path)
{
	std::string ret = ret_success;
	std::fstream fsrc(utf8_path.c_str(), std::fstream::in);
	std::fstream fdst(ansi_path.c_str(), std::fstream::out | std::fstream::trunc);
	if (!fsrc.is_open())
	{
		return ret_src_open;
	}

	if (!fdst.is_open())
	{
		fsrc.close();
		return ret_dst_open;
	}

	std::string line;
	wchar_t wstr[COUNT];
	char src_path[COUNT<<1];
	bool is_first_line = true;
	while (!fsrc.eof())
	{
		std::getline(fsrc, line);
		//printf("0x%02x, 0x%02x", line[0], line[1]);
		if (line.size() >= (COUNT<<3)) // UTF8一个汉字需要用3个字节表示
		{
			ret = ret_over_line;
			break;
		}
		::ZeroMemory(wstr, sizeof(wstr));
		::ZeroMemory(src_path, sizeof(src_path));
		::MultiByteToWideChar(CP_UTF8, 0, line.c_str(), line.size(), wstr, COUNT);
		::WideCharToMultiByte(CP_ACP, 0, wstr, wcslen(wstr), src_path, COUNT<<1, 0, 0);
		int str_len = strlen(src_path);
		src_path[str_len] = '\n';
		if (is_first_line && (0x3F==src_path[0])) // 跳过第一行的第一个字符
		{
			fdst.write(src_path+1, str_len);
			is_first_line = false;
		}
		else
		{
			fdst.write(src_path, str_len+1);
		}
	}

	fsrc.close();
	fdst.close();
	return ret;
}

std::string ansi_utf8(std::string ansi_path, std::string utf8_path)
{
	std::string ret = ret_success;
	std::fstream fsrc(ansi_path.c_str(), std::fstream::in);
	std::fstream fdst(utf8_path.c_str(), std::fstream::out | std::fstream::trunc);
	if (!fsrc.is_open())
	{
		return ret_src_open;
	}

	if (!fdst.is_open())
	{
		fsrc.close();
		return ret_dst_open;
	}

	unsigned char head[3] = {0xEF, 0xBB, 0xBF};
	fdst.write((char*)head, 3); // UTF8文件头

	std::string line;
	wchar_t wstr[COUNT];
	char str[COUNT<<1];
	while (!fsrc.eof())
	{
		std::getline(fsrc, line);
		if (line.size() >= COUNT)
		{
			ret = ret_over_line;
			break;
		}

		::ZeroMemory(str, sizeof(str));
		::ZeroMemory(wstr, sizeof(wstr));
		::MultiByteToWideChar(CP_ACP, 0, line.c_str(), line.size(), wstr, COUNT);
		::WideCharToMultiByte(CP_UTF8, 0, wstr, wcslen(wstr), str, COUNT<<1, 0, 0);
		int len = strlen(str);
		str[len] = '\n';
		fdst.write(str, len+1);
	}

	fsrc.close();
	fdst.close();
	return ret;
}

int get_filenames(const std::string& dir, std::vector<std::string>& filenames)
{
	fs::path path(dir);
	if (!fs::exists(path))
	{
		return -1;
	}

	fs::directory_iterator end_iter;
	for (fs::directory_iterator iter(path); iter!=end_iter; ++iter)
	{
		if (fs::is_regular_file(iter->status()))
		{
			filenames.push_back(iter->path().string());
		}

		if (fs::is_directory(iter->status()))
		{
			get_filenames(iter->path().string(), filenames);
		}
	}

	return filenames.size();
}

void code_convert(const std::string& src_dir,
				  const std::string& dst_dir,
				  TYPE_CONVERT type_convert,
				  std::string expanded_names)
{
	int pos, first, second;
	std::string src_path;
	std::string src_right;
	std::string dst_path;
	std::vector<std::string> vs_expnames;
	std::vector<std::string> src_filenames;
	std::vector<std::string>::iterator iter1, iter2;

	if (UTF8_ANSI!=type_convert && ANSI_UTF8!=type_convert)
	{
		std::cout << ret_type_convert << std::endl;
		return;
	}

	if (get_filenames(src_dir, src_filenames) > 0)
	{
		// 获取所有扩展名
		while ((first = expanded_names.find('.')) != expanded_names.npos)
		{
			second = expanded_names.find('.', first+1);
			if (second != expanded_names.npos)
			{
				vs_expnames.push_back(expanded_names.substr(first, second-first));
				expanded_names = expanded_names.substr(second);
			}
			else
			{
				vs_expnames.push_back(expanded_names.substr(first));
				break;
			}
		}
		
		int num = 0;
		for (iter1=src_filenames.begin(); iter1!=src_filenames.end(); ++iter1)
		{
			// 获取目标文件路径
			src_path = (*iter1);
			pos = src_path.find(src_dir);
			src_right = src_path.substr(pos+src_dir.size(), src_path.size()-pos-src_dir.size());
			dst_path = dst_dir + src_right;

			// 转换指定扩展名文件
			for (iter2=vs_expnames.begin(); iter2!=vs_expnames.end(); ++iter2)
			{
				if (dst_path.substr(dst_path.size()-(*iter2).size()) == (*iter2))
				{
					std::cout << ++num << ": " << src_path << " => " << dst_path << std::endl;

					std::cout << ((UTF8_ANSI==type_convert) ?
						utf8_ansi(src_path, dst_path) :
						ansi_utf8(src_path, dst_path)) << std::endl;
					break;
				}
			}
		}
	}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值