单线程版-上传网络流
import pymysql
import oss2
import requests
import logging
# 添加日志
logging.basicConfig(
level=logging.INFO, # 定义输出到文件的log级别,大于此级别的都被输出
format='%(asctime)s %(filename)s %(levelname)s : %(message)s', # 定义输出log的格式
datefmt='%Y-%m-%d %H:%M:%S', # 时间
filename='Error.log', # log文件名
filemode='a') # 写入模式“w”或“a”
class osss(object):
def __init__(self):
self.db = pymysql.connect(host='', port=3306, database='spider_yu', user='spider',
password='', charset='utf8')
# self.db = pymysql.connect(host='127.0.0.1', port=3306, database='shuo', user='root',
# password='root', charset='utf8')
self.cursor = self.db.cursor()
self.auth = oss2.Auth('LTAIjiyFNM8SukLq', 'fH83Q8o4JDMCpid7XxJSSDm4zPGxDW')
self.bucket = oss2.Bucket(self.auth, 'http://oss-cn-hangzhou.aliyuncs.com', 'yxb-dev')
self.numbre = 0
while True:
self.ssh_fun()
def ssh_fun(self):
num = self.cursor.execute(
'select id, big, avatar from cqyy_product_info where id>{} limit 1000'.format(self.numbre))
if int(num) == int(0):
exit()
data_tuple = self.cursor.fetchall()
for data in data_tuple:
self.numbre = data[0]
big = data[1]
if "?" in big:
big = big.split('?')[0]
avatar = data[2]
if "?" in avatar:
avatar = avatar.split('?')[0]
# print('numbre:"{}", big:"{}", avatar:"{}"'.format(self.numbre, big, avatar))
logging.info('numbre:"{}", big:"{}", avatar:"{}"'.format(self.numbre, big, avatar))
try:
if ',' in big:
big = big.split(',')
big_list = []
for big_url in big:
if '/' in big_url:
big_name = big_url.split('/')[-1]
big_list.append(big_name)
num = self.cursor.execute('select id from cqyy_cqyy where url="{}"'.format(big_name))
if not num:
input = requests.get(big_url)
self.bucket.put_object('yxb-cqyy/drug_image/{}'.format(big_name), input)
self.cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(big_name))
self.db.commit()
big_oss = ','.join(big_list)
self.cursor.execute(
'update cqyy_product_info set big_oss="{}" where id={}'.format(big_oss, self.numbre))
self.db.commit()
else:
if '/' in big:
big_name = big.split('/')[-1]
self.cursor.execute(
'update cqyy_product_info set big_oss="{}" where id={}'.format(big_name, self.numbre))
self.db.commit()
num = self.cursor.execute('select id from cqyy_cqyy where url="{}"'.format(big_name))
if not num:
input = requests.get(big)
self.bucket.put_object('yxb-cqyy/drug_image/{}'.format(big_name), input)
self.cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(big_name))
self.db.commit()
if ',' in avatar:
avatar = avatar.split(',')
avatar_list = []
for avatar_url in avatar:
if '/' in avatar_url:
avatar_name = avatar_url.split('/')[-1]
avatar_list.append(avatar_name)
num = self.cursor.execute('select id from cqyy_cqyy where url="{}"'.format(avatar_name))
if not num:
input = requests.get(avatar_url)
self.bucket.put_object('yxb-cqyy/head_portrait/{}'.format(avatar_name), input)
self.cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(avatar_name))
self.db.commit()
avatar_oss = ','.join(avatar_list)
self.cursor.execute(
'update cqyy_product_info set avatar_oss="{}" where id={}'.format(avatar_oss, self.numbre))
self.db.commit()
else:
if '/' in avatar:
avatar_name = avatar.split('/')[-1]
self.cursor.execute(
'update cqyy_product_info set avatar_oss="{}" where id={}'.format(avatar_name,
self.numbre))
self.db.commit()
num = self.cursor.execute('select id from cqyy_cqyy where url="{}"'.format(avatar_name))
if not num:
input = requests.get(avatar)
self.bucket.put_object('yxb-cqyy/head_portrait/{}'.format(avatar_name), input)
self.cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(avatar_name))
self.db.commit()
except Exception as e:
# print('oss存入失败 id:"{}" 错误类型:"{}"'.format(self.numbre, e))
logging.info('oss存入失败 id:"{}" 错误类型:"{}"'.format(self.numbre, e))
if __name__ == '__main__':
osss = osss()
多线程版-上传网络流
import paramiko, threading
import queue
import pymysql
import oss2
import requests
import logging
# 添加日志
logging.basicConfig(
level=logging.INFO, # 定义输出到文件的log级别,大于此级别的都被输出
format='%(asctime)s %(filename)s %(levelname)s : %(message)s', # 定义输出log的格式
datefmt='%Y-%m-%d %H:%M:%S', # 时间
filename='Error.log', # log文件名
filemode='a') # 写入模式“w”或“a”
class ThreadPool(object):
def __init__(self, maxsize):
self.maxsize = maxsize
self._q = queue.Queue(self.maxsize)
for i in range(self.maxsize):
self._q.put(threading.Thread)
def getThread(self):
return self._q.get()
def addThread(self):
self._q.put(threading.Thread)
dbs = pymysql.connect(host='', port=3306, database='spider_yu', user='spider',
password='', charset='utf8')
def ssh_fun(numbre, big, avatar, pool, db, bucket):
print('numbre:"{}", big:"{}", avatar:"{}"'.format(numbre, big, avatar))
logging.info('numbre:"{}", big:"{}", avatar:"{}"'.format(numbre, big, avatar))
# dbs = pymysql.connect(host='127.0.0.1', port=3306, database='shuo', user='root',
# password='root', charset='utf8')
# dbs = pymysql.connect(host='115.238.111.198', port=3306, database='spider_yu', user='spider',
# password='Kangce@0608', charset='utf8')
cursor = dbs.cursor()
try:
if ',' in big:
big = big.split(',')
big_list = []
for big_url in big:
if '/' in big_url:
big_name = big_url.split('/')[-1]
big_list.append(big_name)
num = cursor.execute('select id from cqyy_cqyy where url="{}"'.format(big_name))
if not num:
# auth = oss2.Auth('LTAIjiyFNM8SukLq', 'fH83Q8o4JDMCpid7XxJSSDm4zPGxDW')
# bucket = oss2.Bucket(auth, 'http://oss-cn-hangzhou.aliyuncs.com', 'yxb-dev')
input = requests.get(big_url)
bucket.put_object('yxb-cqyy/drug_image/{}'.format(big_name), input)
cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(big_name))
dbs.commit()
big_oss = ','.join(big_list)
cursor.execute(
'update cqyy_product_info set big_oss="{}" where id={}'.format(big_oss, numbre))
dbs.commit()
else:
if '/' in big:
big_name = big.split('/')[-1]
cursor.execute(
'update cqyy_product_info set big_oss="{}" where id={}'.format(big_name, numbre))
dbs.commit()
num = cursor.execute('select id from cqyy_cqyy where url="{}"'.format(big_name))
if not num:
# auth = oss2.Auth('LTAIjiyFNM8SukLq', 'fH83Q8o4JDMCpid7XxJSSDm4zPGxDW')
# bucket = oss2.Bucket(auth, 'http://oss-cn-hangzhou.aliyuncs.com', 'yxb-dev')
input = requests.get(big)
bucket.put_object('yxb-cqyy/drug_image/{}'.format(big_name), input)
cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(big_name))
dbs.commit()
if ',' in avatar:
avatar = avatar.split(',')
avatar_list = []
for avatar_url in avatar:
if '/' in avatar_url:
avatar_name = avatar_url.split('/')[-1]
avatar_list.append(avatar_name)
num = cursor.execute('select id from cqyy_cqyy where url="{}"'.format(avatar_name))
if not num:
# auth = oss2.Auth('LTAIjiyFNM8SukLq', 'fH83Q8o4JDMCpid7XxJSSDm4zPGxDW')
# bucket = oss2.Bucket(auth, 'http://oss-cn-hangzhou.aliyuncs.com', 'yxb-dev')
input = requests.get(avatar_url)
bucket.put_object('yxb-cqyy/head_portrait/{}'.format(avatar_name), input)
cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(avatar_name))
dbs.commit()
avatar_oss = ','.join(avatar_list)
cursor.execute(
'update cqyy_product_info set avatar_oss="{}" where id={}'.format(avatar_oss, numbre))
dbs.commit()
else:
if '/' in avatar:
avatar_name = avatar.split('/')[-1]
cursor.execute(
'update cqyy_product_info set avatar_oss="{}" where id={}'.format(avatar_name, numbre))
dbs.commit()
num = cursor.execute('select id from cqyy_cqyy where url="{}"'.format(avatar_name))
if not num:
# auth = oss2.Auth('LTAIjiyFNM8SukLq', 'fH83Q8o4JDMCpid7XxJSSDm4zPGxDW')
# bucket = oss2.Bucket(auth, 'http://oss-cn-hangzhou.aliyuncs.com', 'yxb-dev')
input = requests.get(avatar)
bucket.put_object('yxb-cqyy/head_portrait/{}'.format(avatar_name), input)
cursor.execute(
'insert into cqyy_cqyy(url) values ("{}")'.format(avatar_name))
dbs.commit()
except Exception as e:
# print('oss存入失败 id:"{}" 错误类型:"{}"'.format(numbre, e))
logging.info('oss存入失败 id:"{}" 错误类型:"{}"'.format(numbre, e))
finally:
# dbs.close()
pool.addThread()
if __name__ == '__main__':
t_list = []
pool = ThreadPool(3)
numbre = 0
# db = pymysql.connect(host='127.0.0.1', port=3306, database='shuo', user='root',
# password='root', charset='utf8')
db = pymysql.connect(host='', port=3306, database='spider_yu', user='spider',
password='', charset='utf8')
cursor = db.cursor()
auth = oss2.Auth('LTAIjiyFNM8SukLq', 'fH83Q8o4JDMCpid7XxJSSDm4zPGxDW')
bucket = oss2.Bucket(auth, 'http://oss-cn-hangzhou.aliyuncs.com', 'yxb-dev')
while True:
num = cursor.execute('select id, big, avatar from cqyy_product_info where id>{} limit 1000'.format(numbre))
if int(num) == int(0):
exit()
data_tuple = cursor.fetchall()
for data in data_tuple:
numbre = data[0]
big = data[1]
if "?" in big:
big = big.split('?')[0]
avatar = data[2]
if "?" in avatar:
avatar = avatar.split('?')[0]
# if int(numbre) == int(20):
# exit()
th = pool.getThread()
t = th(target=ssh_fun, args=(numbre, big, avatar, pool, db, bucket))
t.start()
t_list.append(t)
for i in t_list:
i.join()
db.close()