由于泄露出来的CSDN数据库文件不是标准的sql语句,所以产生了用python进行转换的想法,顺便熟悉一下python对字符串和列表的操作效率。
原文件解码用的是用decode('gb18030'),但仍出现了2个解码错误。不知哪位大侠能告诉我解决办法。
我用的是str.split()函数对file.readline()进行简单的截断。但是感觉很不优雅,却想不出简单的办法,坐等指点。
下面把处理函数贴出来
#coding = utf8
def d2(self,select,output):
all_temp = open(select,'r')
outsql = open(output,'a')
outsql.write(u'INSERT INTO `csdn_csdnuser` (`username`, `userpass`, `useremail`) VALUES\n'.encode('utf8'))
temp = all_temp.readline().replace("'","''").decode('gb18030').split()
temp_line = u"('%s', '%s', '%s')" % (temp[0],temp[2],temp[4])
outsql.write(temp_line.encode('utf8'))
i = 1
while True :
try:
temp = all_temp.readline().replace("'","''").decode('gb18030').split()
if len(temp) == 0:
break
temp_line = u",\n('%s', '%s', '%s')" % (temp[0],temp[2],temp[4])
outsql.write(temp_line.encode('utf8'))
except:
temp_line = u",\n('%s', '%s', '%s'),\n" % (u'qtsharp',u' ',u'qtsharp@qq.com')
outsql.write(temp_line.encode('utf8'))
n = i + 1
if n>=1000000 and n%1000000==0:
self.label_3.setText(u'已经转换%s个' %n)
i += 1
outsql.write(';')
all_temp.close()
outsql.close()
下面是用PySide建的程序界面
#!usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from PySide.QtCore import *
from PySide.QtGui import *
import thread
class
from form import Ui_Form
from output_func import d2
MainUi(QWidget,Ui_Form):
def __init__(self,parent = None):
super(MainUi,self).__init__(parent)
self.setupUi(self)
self._connect_slot()
_select = ''
_output = ''
#连接槽
def _connect_slot(self):
self.select_button.clicked.connect(self.change_select)
self.output_button.clicked.connect(self.change_output)
self.start_button.clicked.connect(self.start_out)
def change_select(self):
select_name = QFileDialog.getOpenFileName(self,u'选择源文件',QDir.currentPath())
self._select = select_name[0]
self.label.setText(self._select)
def change_output(self):
output_name = QFileDialog.getSaveFileName(self,u'保存输出文件',QDir.currentPath())
self._output = output_name[0]
self.label_2.setText(self._output)
#执行函数
def start_out(self):
thread.start_new_thread(self._out_thread,())
self.label_3.setText(u'转换进行中')
#使用新线程
def _out_thread(self):
if len(self._select):
if len(self._output):
try:
d2(self,self._select,self._output)
\self.label_3.setText(u'完成')
except:
self.label_3.setText(u'出错了')
else:
self.label_3.setText(u'你还没有设置保存路径')
else:
self.label_3.setText(u'你还没有设置源文件')
def main():
app = QApplication(sys.argv)
ui = MainUi()
ui.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
form.py是用qtcreater创建的form.ui文件经piside-uic.exe转换得来,这里就不贴出来了。
下面是最终效果