用到了进程池,代理
import requests
import json
import jsonpath
import pymysql
import queue
from multiprocessing import Pool
import random
requests.packages.urllib3.disable_warnings()
# 创建连接
db = pymysql.connect('127.0.0.1','root','cyl666.','scrapy',charset='utf8')
# 创建游标对象
cursor = db.cursor()
# 数据库插入
def insert_mysql(sets):
cursor.execute(
'insert into red_wine(title,grade,score,country,img_url,img_content,winery,region,regional_styles,food,grapes,acidity,alcohol) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',
sets)
db.commit()
# 获取代理
def ip_proxy():
# 获取代理的 api 每次 20个
url = 'http://webapi.http.zhimacangku.com/getip?num=20&type=2&pro=&city=0&yys=0&port=1&pack=34365&ts=1&ys=1&cs=1&lb=1&sb=0&pb=4&mr=1®ions='
html = requests.get(url)
ip_js = json.loads(html.text)
all_ip = jsonpath.jsonpath(ip_js, 'data.*')
ip_lists = []
for i in all_ip:
proxy = {
'http': 'http://%s:%s' % (i['ip'], i['port']),
'https': 'https://%s:%s' % (i['ip'], i['port']),
}
ip_lists.append(proxy)
return ip_lists
# 返回第一个数据
def return_one(alist):
if alist:
return alist[0]
else:
return '空'
def detail(url,ip):
try:
headers = {
'Host': 'www.vivino.com',
'Connection': 'keep-alive',
'Accept'