#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = ''
import logging, os, argparse, textwrap
import time
import chardet
# Default configuration will take effect when corresponding input args are missing.
# Feel free to change this for your convenience.
DEFAULT_CONF = {
# Only those files ending with extensions in this list will be scanned or converted.
'exts' : ['cfg'],
'overwrite' : False,
'add_BOM' : False,
'convert_UTF' : False,
'confi_thres' : 0.8,
}
# We have to set a minimum threshold. Only those target_encoding results returned by chartdet that are above that threshold level would be accepted.
logging.basicConfig(format='%(levelname)s:%(message)s', level=http://logging.INFO)
log = logging.getLogger(__name__)
class Convert2Utf8:
def __init__(self, args):
self.args = args
def walk_dir(self, dirname):
for root, dirs, files in os.walk(dirname):
for name in files:
extension = os.path.splitext(name)[1][1:].strip().lower()
# On linux there is a newline at the end which will cause the match to fail, so we just 'strip()' the '\n'
# Also, add 'lower()' to ensure matching
if (extension in self.args.exts):
fullname = os.path.join(root, name)
try:
self.convert_file(fullname)
except IOError:
log.error("Unable to read or write the file: %s. Please check the file's permission.", fullname)
except KeyboardInterrupt:
log.warning("Interrupted by keyboard (e.g. Ctrl+C)")
exit()
# else: