!!!!!!!!!!!!!!!!!!
###############
#coding=gb18030
import sys
import os
import json
%%%%%%%%%%%%%%%%%%%%
reload(sys)
#sys.setdefaultencoding("utf-8")
sys.setdefaultencoding("gb18030")
################################################################################
def diff_check(before_file, after_file, out_request_file, out_query_file, out_diff_file):
clear_file_content(out_request_file)
clear_file_content(out_query_file)
clear_file_content(out_diff_file)
out_request_f = open(out_request_file, "a")
out_query_f = open(out_query_file, "a")
out_diff_f = open(out_diff_file, "a")
#sort before_file to temp file a and open file a
os.system('sort %s > a'%before_file)
before_sort_f = open("./a", "r")
#sort after_file to temp file b and open file b
os.system('sort %s > b'%after_file)
after_sort_f = open("./b", "r")
before_lines = before_sort_f.readlines()
after_lines = after_sort_f.readlines()
i = 0
continue_num = 0
j = 0
for before_line in before_lines:
after_line = after_lines[i]
i += 1
if before_line.find('query') == -1 and after_line.find('query') == -1:
#both have no query
continue_num +=1
continue
before_line = before_line.strip('\n')
after_line = after_line.strip('\n')
before_req, before_rsp = before_line.split('\t', 1)
after_req, after_rsp = after_line.split('\t', 1)
j += 1
try:
before_req = before_req.decode('gb18030','ignore')
before_rsp = before_rsp.decode('gb18030','ignore')
before_req_json = json.loads(before_req)
#before_rsp_json = json.loads(before_rsp)
# before_req_json = eval(before_req)
before_rsp_json = eval(before_rsp)
after_req = after_req.decode('gb18030','ignore')
after_rsp = after_rsp.decode('gb18030','ignore')
after_req_json = json.loads(after_req)
# after_rsp_json = json.loads(after_rsp)
# after_req_json = eval(after_req)
after_rsp_json = eval(after_rsp)
#get count_donw and source_type
before_count_down = ""
before_soure_type = []
after_count_down = ""
after_source_type = []
before_count_down = get_count_down(before_rsp_json)
after_count_down = get_count_down(after_rsp_json)
before_source_type = get_source_type(before_rsp_json)
after_source_type = get_source_type(after_rsp_json)
before_content = get_content(before_rsp_json)
after_content = get_content(after_rsp_json)
if before_line.find('query') != -1 and after_line.find('query') != -1:
#both have query
before_query_name = before_req_json['query'].strip()
after_query_name = after_req_json['query'].strip()
if before_query_name == after_query_name:
#equal, then compare count_down
if before_count_down == after_count_down:
#equal count_down, then compare source_type
if before_source_type != after_source_type:
#source_type not equal
#write request to file
print >> out_request_f, before_req
#write query to file
print >> out_query_f, before_query_name
#write diff to file
print >> out_diff_f, before_query_name,":",after_query_name,"\t",before_count_down,":",after_count_down,"\t",before_source_type,":",after_source_type, "\t", before_content,":",after_content
else:
if before_content != after_content:
#source_type not equal
#write request to file
print >> out_request_f, before_req
#write query to file
print >> out_query_f, before_query_name
#write diff to file
print >> out_diff_f, before_query_name,":",after_query_name,"\t",before_count_down,":",after_count_down,"\t",before_source_type,":",after_source_type, "\t", before_content,":",after_content
else:
#count_down not equal, then write to outfile
#write request to file
print >> out_request_f, before_req
#write query to file
if len(before_query_name) > 0:
print >> out_query_f, before_query_name
#write diff to file
print >> out_diff_f, before_query_name,":",after_query_name,"\t",before_count_down,":",after_count_down,"\t",before_source_type,":",after_source_type, "\t", before_content,":",after_content
else:
#query not equal,then write to outfile
#write request to file
print >> out_request_f, before_req
#write query to file
if len(before_query_name) > 0:
print >> out_query_f, before_query_name
else:
print >> out_query_f, after_query_name
#write diff to file
print >> out_diff_f, before_query_name,":",after_query_name,"\t",before_count_down,":",after_count_down,"\t",before_source_type,":",after_source_type, "\t", before_content,":",after_content
else:
#have one query or both query is empty
#write request to file
print >> out_request_f, before_req
#write query to file
if len(before_query_name) > 0:
print >> out_query_f, before_query_name
elif len(after_query_name) > 0:
print >> out_query_f, after_query_name
#write diff to file
print >> out_diff_f, before_query_name,":",after_query_name,"\t",before_count_down,":",after_count_down,"\t",before_source_type,":",after_source_type, "\t", before_content,":",after_content
except Exception as e:
print "%d [ERROR] diff_data error: error INFO is : %s"%(j, e)
out_request_f.close()
out_query_f.close()
out_diff_f.close()
before_sort_f.close()
after_sort_f.close()
def clear_file_content(filename):
try:
f = open(filename,"w")
if os.path.exists(filename):
f.truncate()
f.close()
except:
print "file operation error."
def get_count_down(count_down_json):
count_down = ""
if count_down_json.has_key('data'):
if count_down_json['data'].has_key('result_list'):
if len(count_down_json['data']['result_list']) > 0:
if count_down_json['data']['result_list'][0].has_key('count_down'):
count_down = count_down_json['data']['result_list'][0]['count_down']
return count_down
def get_source_type(source_type_json):
source_type_list = []
list = []
if source_type_json.has_key('data'):
if source_type_json['data'].has_key('result_list'):
if len(source_type_json['data']['result_list']) > 0:
if source_type_json['data']['result_list'][0].has_key('result_list'):
list = source_type_json['data']['result_list'][0]['result_list']
if len(list) > 0:
source_type = list[0]['source_type'].strip()
source_type_list.append(source_type)
return source_type_list
def get_content(content_json):
content = ""
if content_json.has_key('data'):
if content_json['data'].has_key('result_list'):
if len(content_json['data']['result_list']) > 0:
if content_json['data']['result_list'][0].has_key('result_list'):
list = content_json['data']['result_list'][0]['result_list']
if len(list) > 0:
content = list[0]['content'].strip()
return content
if __name__ == '__main__':
before_file = sys.argv[1]
after_file = sys.argv[2]
out_request_file = sys.argv[3]
out_query_file = sys.argv[4]
out_diff_file = sys.argv[5]
diff_check(before_file, after_file, out_request_file, out_query_file, out_diff_file)