#!/usr/bin/env python
# -*- coding: utf-8 -*-
#find_non_utf8.py
import os,sys
import time
start_time = time.time()
def isUTF8(text):
try:
text = unicode(text, 'UTF-8', 'strict')
return True
except UnicodeDecodeError:
return False
pwd = os.getcwd()
inFileName = pwd + r'/mi_201505_31192_01_01_001.dat' #input file
outFileName = pwd + r'/mi_201505_31192_01_01_001.dat.non-utf8' #output file
print 'input file : ',inFileName
print 'output file: ',outFileName,'\n'
inFile = open(inFileName,'r')
outFile = open(outFileName,'w')
#read 1 line at a time
while True:
line_str = inFile.readline()
if line_str: #if read the end of the input file
if not isUTF8(line_str):
outFile.write(line_str)
else:
break
inFile.close()
outFile.close()
print '\nexecution time:', time.time() - start_time , 'second(s)\n'