# -*- coding: utf-8 -*-
#
# Imports
#
import asyncio
import hashlib
import time
import datetime
import json
from aiomultiprocess import Pool
from redis import *
from pybloom_live import BloomFilter
import aiohttp
#
# Public variable
#
Bloom_data = BloomFilter(1000000000,0.01)
DB_get_question=StrictRedis(host='', port=6480,
password='',db=4)
pipeline_redis = DB_get_question.pipeline()
#
# Public functions
#
def md5(data):
"""
对数据进行MD5加密
:param data:
:return:
"""
md5_qa = hashlib.md5(data.encode('utf8')).hexdigest()
md5_qa = bytes(md5_qa, encoding='utf8')
return md5_qa
async def get(data):
"""
协程函数
:param url:
:return:
"""
# while True:
# print('data:',data)
# try:
url = ''
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
get_proxy = DB_get_question.spop('IP_PROXY')
response = await session.post(url,json = data,timeout = 7,proxy = {"http": "http://{}".format(get_proxy)})
result = await response.text()
hjson = json.loads(result)
content = hjson['results'][0]['values']['text']
# print('data:',data)
print('\033[32;1mget_question\033[0m:', content)
await asyncio.sleep(0.1)
return content
# except:
# open('error_url.txt','a').write(url + '\n')
# await get(data)
async def request():
"""
使用进程加异步协程发送请求
:return:
"""
key_number = 0
datas = ['']
split_key = DB_get_question.spop('key2_set').decode('utf8').split(': ')
key = split_key[-1].replace('\'', '').replace('}', '')
phone = split_key[0].replace('\'', '').replace('{', '').replace('b', '')
while len(datas) != 0:
key_number += 1
if len(datas) > 1:
async with Pool() as pool:
get_proxy = DB_get_question.spop('IP_PROXY')
result_list = await pool.map(get, datas)
# print(result_list)
for result in result_list:
if result:
# print('key',key)
# print('phone', phone)
if '请求次数' in result or 'key不对' in result or '请求内容为空' in result:
split_key = DB_get_question.spop('key2_set').decode('utf8').split(': ')
key = split_key[-1].replace('\'', '').replace('}', '')
phone = split_key[0].replace('\'', '').replace('{', '')
break
md5_qa = md5(result)
if md5_qa not in Bloom_data:
Bloom_data.add(md5_qa)
# pipeline_redis.lpush('total_question_list', result)
pipeline_redis.sadd('get_question',result)
pipeline_redis.execute()
datas.clear()
question_number = 0
while True:
question_number += 1
pipeline_redis.spop('original_question_set')
if question_number == 100:
question_list = pipeline_redis.execute()
break
datas = {}
print('datas',datas)
print(datas)
if key_number == 500:
split_key = DB_get_question.spop('key2_set').decode('utf8').split(': ')
key = split_key[-1].replace('\'', '').replace('}', '')
phone = split_key[0].replace('\'', '').replace('{', '')
key2_set_number=DB_get_question.scard('key2_set')
if key2_set_number < 5:
with open('key2_total.txt', 'r')as f_key:
for key in f_key:
key = key.strip()
pipeline_redis.sadd('key2_set', key)
pipeline_redis.execute()
key_number = 0
coroutine = request()
task = asyncio.ensure_future(coroutine)
loop = asyncio.get_event_loop()
loop.run_until_complete(task)