#coding: utf-8
importMySQLdbimportrequestsimportosimportrefrom threading importThreadimportdatetime
header= {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
'Chrome/63.0.3239.132 Safari/537.36'}
file_path= 'F:\\mlu2'
if notos.path.exists(file_path):
os.mkdir(file_path)classSpider(object):def __init__(self, file_path, header):
self.file_path=file_path
self.header=header
@staticmethoddeftimer(func):def time_count(*args):
start_time=datetime.datetime.now()
func(*args)
end_time=datetime.datetime.now()
day= (end_time -start_time).days
times= (end_time -start_time).seconds
hour= times / 3600h= times % 3600minute= h / 60m= h % 60second=mprint "爬取完成"
print "一共用时%s天%s时%s分%s秒" %(day, hour, minute, second)returntime_countdefget_link(self):
conn= MySQLdb.connect(host='localhost',
port=3306,
user='root',
passwd='729814',
db='mlu',
charset='utf8')
cur=conn.cursor()
sql= 'select image from msg limit 100' #image为事先爬取存到MySQL的图片链接地址
cur.execute(sql)
img_link=cur.fetchall()returnimg_linkdefdownload(self, link):
filename= re.findall(r'.*/(.+)', link)[0]try:
pic= requests.get(link, headers=self.header)if pic.status_code == 200:
with open(os.path.join(self.file_path)+os.sep+filename, 'wb') as fp:
fp.write(pic.content)
fp.close()print "下载完成"
exceptException as e:printe
@timerdefrun_main(self):
threads=[]
links=self.get_link()for link inlinks:
img=str(link[0])
t= Thread(target=self.download, args=[img])
t.start()
threads.append(t)for t inthreads:
t.join()
spider=Spider(file_path, header)
spider.run_main()