这次是把爬取到的页面URL、js、css地址和访问后的状态码保存起来,存入数据库。我再本地Linux虚拟机建立了一个数据库
create table searchurl(ID INT NOT NULL AUTO_INCREMENT,URLADDRESS VARCHAR(100) NOT NULL,
RESULTCODE VARCHAR(10) NOT NULL,ADDTIME VARCHAR(20),PRIMARY KEY(ID)) default charset utf8 collate utf8_unicode_ci;
后面那句default charset utf8 collate utf8_unicode_ci要加上,设置数据库默认编码为utf-8,不然,当程序插入数据库的语句存在非utf-8编码的字符串时,
会报错,类型:SQL Error: 1366: Incorrect string value: "\xE8\xAF\xA6\xE7\xBB\x86…" for column "urladdress" at row 1
我新建了.py文件,写一个类用于数据库的操作
链接数据库
def connDB(self):
try:
conn = MySQLdb.connect(
'192.168.190.129',
'root',
'123456',
'pytest'
)
#获取操作游标
print 'connect suucess'
return conn
except Exception as e:
print e
插入数据操作
def insertDB(self,urladdress,resultcode):
#pyone.reasonCode()
nowtime = self.getTime()
connect = self.connDB()
insertsql = 'insert into searchurl(URLADDRESS,RESULTCODE,ADDTIME) values (%s,%s,%s)'
valuelist = (urladdress,resultcode,nowtime)
cur = connect.cursor()
connect.set_character_set('utf8')
cur.execute('set names utf8;')
cur.execute('set character set utf8;')
cur.execute('set character_set_connection=utf8;')
try:
#cur.execute('set character_set_connection=utf-8;')
cur.execute(insertsql,valuelist)
print 'insert success'
except Exception as e:
print e
connect.commit()
cur.close()
connect.close()
db = DoSql()
db.insertDB(sendurl, responsecode)
这是插入数据库后的图片