多进程编程、微博登录(获取cookie)

获取xicidaili能用的ip,多进程编程

import requests
from lxml import etree
import time
from multiprocessing import Pool

class GetProxy(object):
    def get_all_proxy(self):
        assert(0)  # 执行到这,必然报错

    def validate_proxy(self, proxy_str):
        url = 'http://www.baidu.com'
        proxy = {
            'http': proxy_str,
            'https': proxy_str
        }
        try:
            response = requests.get(url, timeout=5, proxies=proxy)
            print('这个proxy好用', proxy)
            return proxy
        except:
            print("这个ip不行", proxy)
            return None

    def validate_proxy_concurrent(self):
        # 进程池
        # 好处:能够重用进程、能够限制进程的数量
        # 1.生成进程池的类
        pool = Pool(30)
        # 2.将任务设置到进程池
        # for task in task_list:
        # 可以接收进程池的返回值
        res_list = []
        for proxy in self.get_all_proxy():
            res = pool.apply_async(func = self.validate_proxy, args = (proxy,))  # fun代表函数,但后边只要函数名就行,不要括号
            res_list.append(res)
        # 获取返回值  一定要上面都完成才行,不能之间在上面就开始遍历,因为上边没结束,赋值不了
        good_proxy_list = []
        for res in res_list:
            good_proxy = res.get()
            if good_proxy:
                good_proxy_list.append(good_proxy)
        # 3.将进程池关闭
        pool.close()
        # 4.等待所有进程结束
        pool.join()

        return good_proxy_list
        # good_proxy_list = []
        # for proxy in get_all_proxy():
        #     if validate_proxy(proxy):
        #         good_proxy_list.append(proxy)

class Getxicidailiproxy(GetProxy):

    def get_all_proxy(self):
        headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
        }

        url = 'https://www.xicidaili.com/nn/'
        response = requests.get(url, headers = headers)

        html_ele = etree.HTML(response.text)

        tr_ele_list = html_ele.xpath('//table[@id="ip_list"]/tr')
        # 把第一个表头去掉
        tr_ele_list = tr_ele_list[4:]
        for tr_ele in tr_ele_list:
            ip = tr_ele.xpath('./td[2]/text()')[0]  # .表示当前位置
            port = tr_ele.xpath('./td[3]/text()')[0]
            proxy_str = 'http://' + ip + ':' + port
            yield proxy_str




if __name__ == '__main__':
    start_time = time.time()
    xici_proxy = Getxicidailiproxy()
    good_proxy_list = xici_proxy.validate_proxy_concurrent()
    print('所有的好用的proxy是:')
    print(good_proxy_list)
    end_time = time.time()
    print("花费时间",end_time - start_time)

微博自动登录

from selenium import webdriver
import time
import requests

driver = webdriver.Chrome()
driver.get('http://weibo.com/')
time.sleep(10)

driver.find_element_by_id('loginname').send_keys('18804899903')
driver.find_element_by_name('password').send_keys('insist44668')
driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(4)
# 页面的获取使用driver.page_source
if "请输入验证码" in driver.page_source:
    img_ele = driver.find_element_by_xpath('//a[@class = "code W_fl"]/img')
    img_link = img_ele.get_attribute('src')
    response1 = requests.get(img_link)
    with open('yanzhengma.jpg', 'wb') as f:
        f.write(response1.content)
    input_src = input('请输入验证码:')
    driver.find_element_by_name('verifycode').send_keys(input_src)
    driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(20)

cookie_list = driver.get_cookies()
print(cookie_list)
# cookie是一个字符串,全是键值对的匹配,但cookie_list是一个字典,需要转化成字典
cookie_item_str_list = []
for cookie_item in cookie_list:
    name = cookie_item['name']
    value = cookie_item['value']
    cookie_item_str = name + '=' + value
    cookie_item_str_list.append(cookie_item_str)

cookie_str = ';'.join(cookie_item_str_list)


url = 'https://account.weibo.com/set/index?topnav=1&wvr=6'

headers ={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
    'cookie':cookie_str
}
response = requests.get(url,headers = headers)
with open('weibo.html', 'wb') as f:
    f.write(response.content)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值