由于应用用户量庞大,千万级别,运营反馈部分页面点击今日后找不到图片,
导致页面加载速度慢,用户体验差,故抽了一点点时间写的这段代码。
这段代码写于2016年,之前在其他博客平台,现在搬回自己的个人博客。
CREATE TABLE `prefix_repo` (
`rid` int(10) unsigned NOT NULL AUTO_INCREMENT,
`status` tinyint(3) NOT NULL DEFAULT '0',
`default_image` varchar(100) NOT NULL DEFAULT '',
PRIMARY KEY (`rid`),
KEY `rid` (`rid`,`status`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `prefix_repoimage` (
`item_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`rid` int(10) unsigned NOT NULL,
`url` varchar(120) NOT NULL DEFAULT '',
PRIMARY KEY (`item_id`),
KEY `rid` (`rid`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;
#!/usr/bin/env python
#coding:utf-8
from mysql import connector
import urllib2
import sys
import socket
HOST = 'localhost'
USER = 'root'
PASSWORD = ''
DATABASE = 'pytest'
ImagePrefix = 'http://image.59store.com'
def pyconnect(**kwargs):
try:
connect = connector.connect(host=kwargs.get('host'), user=kwargs.get('user'), passwd=kwargs.get('password'), db=kwargs.get('database'))
return connect
except Exception as e:
print(e)
return False
def queryData(start=0, limit=100, type=1):
connect = pyconnect(host=HOST, user=USER, password= PASSWORD, database= DATABASE)
if connect != False:
cursor = connect.cursor()
if type == 1:
sql = 'SELECT rid,default_image FROM prefix_repo limit %s,%s'
sql = sql % (start, limit)
else:
sql = 'SELECT rid,url FROM prefix_repoimage limit %s,%s'
sql = sql % (start, limit)
cursor.execute(sql)
data = cursor.fetchall()
return data
else:
return False
def check200(start, limit, type=1):
file = open('%s.txt' % (type),'a')
data = queryData(start, limit, type)
if data != False:
for row in data:
try:
url = ImagePrefix + row[1]
print url
response = urllib2.urlopen(url, timeout=3)
httpCode = response.getcode()
if httpCode != 200:
file.write('%s\t%s \n' % (row[0], url))
else:
print(row[0],'ok')
except urllib2.HTTPError as e:
file.write('%s\t%s \n' % (row[0], url))
except socket.timeout as e:
file.write('%s\t%s \n' % (row[0], url))
file.close()
if __name__ == "__main__":
args = sys.argv
print args
check200(args[1],args[2],int(args[3]))
调用说明
./pycheck404url.py 0 100 1
0:起始便宜量,100 查询总数 1:查询主表
./pycheck404url.py 0 100 1
./pycheck404url.py 0 100 2