python3多线程协程_python爬虫基础3.5——多线程+协程

importasyncio

importrandom

importre

importtime

fromconcurrent.futures importThreadPoolExecutor

importmatplotlib.pyplot asplt

fromaiohttp importClientSession

# 参数times用来模拟网络请求的时间

string ="https://blockchain.info/block-height/{}?format=json"max_cpu=10

max_io=50

max_html=8

f1=open('test.txt','w')f1.write('所有lock_time不为零交易id号\n')f1.close()user_agent = ["Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",

"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",

"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",

"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",

"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",

"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",

"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",

"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",

"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",

"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",

"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",

"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",

"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",

"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",

"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",

"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",

"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",

"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",

"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",

"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",

"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",

"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",

"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",

"UCWEB7.0.2.37/28/999",

"NOKIA5700/ UCWEB7.0.2.37/28/999",

"Openwave/ UCWEB7.0.2.37/28/999",

"Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",

# iPhone 6:

"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25",

]defdatain(version, locktime):# 类型分别为两位数组

lock_lable ='lock=0', 'lock!=0'lock_data =locktime

plt.axes(aspect=1)plt.subplot(2, 1, 1)plt.pie(x=lock_data, labels=lock_lable, autopct='%.0f%%')ver_lable ='ver=1', 'ver=2'ver_data =version

plt.subplot(2, 1, 2)plt.pie(x=ver_data, labels=ver_lable, autopct='%.0f%%')plt.show()defdata_handle(response,data,version,lock_time):ver =re.compile("\"ver\":\d,")lock_t =re.compile("\"lock_time\":\d+,")tx_id =re.compile("\"hash\":\"\w+\",")temp1 =re.finditer(ver, response)temp2 =re.finditer(lock_t, response)temp3 =re.finditer(tx_id, response)while True:try:if(re.search("\d", next(temp1).group()).group()=='1'):version[0] +=1

else:version[1] +=1

if(re.search("\d+", next(temp2).group()).group()=='0'):lock_time[0] +=1

next(temp3)else:lock_time[1] +=1

id =next(temp3).group()data.append(id)except:breakdefcolletc(n):async defhello(url, data, version, lock_time):async withClientSession()assession:headers ={'User-Agent':random.choice(user_agent)}#随机heads头

chance =0

while True:try:async withsession.get(url, headers=headers, timeout=100)asresponse:if(response.status ==200):response =awaitresponse.text()print(200)breakelif(chance >10):print("error +%s"%url)breakelse:print("connect error")chance +=1

except:print("ssl error")chance +=1

awaitasyncio.sleep(0)data_handle(response, data, version, lock_time)returndefrun(begin,tasks):data = []error = []version = [0, 0]lock_time = [0, 0]temp=begin*max_io

fori inrange(500000+temp, 500000+temp+max_io):task =asyncio.ensure_future(hello(string.format(i), data, version, lock_time))tasks.append(task)returnversion, lock_time, data, error

defget_html(begin):tasks = []loop =asyncio.new_event_loop()#首先,你得到的AssertionError: There is no current event loop in thread ‘Thread-1’.

# 是因为asyncio程序中的每个线程都有自己的事件循环,但它只会在主线程中为你自动创建一个事件循环。

# 所以如果你asyncio.get_event_loop在主线程中调用一次,它将自动创建一个循环对象并将其设置为默认值,

# 但是如果你在一个子线程中再次调用它,你会得到这个错误。相反,您需要在线程启动时显式创建/设置循环

asyncio.set_event_loop(loop)data =run(begin,tasks)loop.run_until_complete(asyncio.wait(tasks))returndata

data_all = []version_all = [0, 0]lock_time_all = [0, 0]urls = []forx inrange(n*max_html,(n+1)*max_html):urls.append(x)withThreadPoolExecutor(max_workers=max_cpu)asexecutor:#避免手动关闭,切记

fortemp inexecutor.map(get_html, urls):version_all[0]+=temp[0][0]version_all[1] +=temp[0][1]lock_time_all[0]+=temp[1][0]lock_time_all[1] +=temp[1][1]forx intemp[2]:data_all.append(x)print(version_all,lock_time_all)# print(data_all)

f1 =open('test.txt', 'a')t1 =time.time()forx indata_all:f1.write(x+'\n')f1.close()t2 =time.time()print(t2 -t1)returnversion_all,lock_time_all

version_all = [0, 0]lock_time_all = [0, 0]t1=time.time()colletc(0)# for x in range(100):

# version, lock_time= colletc(x)

# version_all[0] += version[0]

# version_all[1] += version[1]

# lock_time_all[0] += lock_time[0]

# lock_time_all[1] += lock_time[1]

# datain(version_all,lock_time_all)

t2=time.time()print(t2-t1)print(version_all,lock_time_all)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值