多进程编程、微博登录（获取cookie）

最新推荐文章于 2023-06-30 17:52:52 发布

learner_pu

最新推荐文章于 2023-06-30 17:52:52 发布

阅读量223

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/qq_38408573/article/details/104491660

版权

python 专栏收录该内容

18 篇文章 0 订阅

订阅专栏

获取xicidaili能用的ip,多进程编程

import requests
from lxml import etree
import time
from multiprocessing import Pool

class GetProxy(object):
    def get_all_proxy(self):
        assert(0)  # 执行到这，必然报错

    def validate_proxy(self, proxy_str):
        url = 'http://www.baidu.com'
        proxy = {
            'http': proxy_str,
            'https': proxy_str
        }
        try:
            response = requests.get(url, timeout=5, proxies=proxy)
            print('这个proxy好用', proxy)
            return proxy
        except:
            print("这个ip不行", proxy)
            return None

    def validate_proxy_concurrent(self):
        # 进程池
        # 好处：能够重用进程、能够限制进程的数量
        # 1.生成进程池的类
        pool = Pool(30)
        # 2.将任务设置到进程池
        # for task in task_list:
        # 可以接收进程池的返回值
        res_list = []
        for proxy in self.get_all_proxy():
            res = pool.apply_async(func = self.validate_proxy, args = (proxy,))  # fun代表函数，但后边只要函数名就行，不要括号
            res_list.append(res)
        # 获取返回值  一定要上面都完成才行，不能之间在上面就开始遍历，因为上边没结束，赋值不了
        good_proxy_list = []
        for res in res_list:
            good_proxy = res.get()
            if good_proxy:
                good_proxy_list.append(good_proxy)
        # 3.将进程池关闭
        pool.close()
        # 4.等待所有进程结束
        pool.join()

        return good_proxy_list
        # good_proxy_list = []
        # for proxy in get_all_proxy():
        #     if validate_proxy(proxy):
        #         good_proxy_list.append(proxy)

class Getxicidailiproxy(GetProxy):

    def get_all_proxy(self):
        headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
        }

        url = 'https://www.xicidaili.com/nn/'
        response = requests.get(url, headers = headers)

        html_ele = etree.HTML(response.text)

        tr_ele_list = html_ele.xpath('//table[@id="ip_list"]/tr')
        # 把第一个表头去掉
        tr_ele_list = tr_ele_list[4:]
        for tr_ele in tr_ele_list:
            ip = tr_ele.xpath('./td[2]/text()')[0]  # .表示当前位置
            port = tr_ele.xpath('./td[3]/text()')[0]
            proxy_str = 'http://' + ip + ':' + port
            yield proxy_str




if __name__ == '__main__':
    start_time = time.time()
    xici_proxy = Getxicidailiproxy()
    good_proxy_list = xici_proxy.validate_proxy_concurrent()
    print('所有的好用的proxy是：')
    print(good_proxy_list)
    end_time = time.time()
    print("花费时间",end_time - start_time)

微博自动登录

from selenium import webdriver
import time
import requests

driver = webdriver.Chrome()
driver.get('http://weibo.com/')
time.sleep(10)

driver.find_element_by_id('loginname').send_keys('18804899903')
driver.find_element_by_name('password').send_keys('insist44668')
driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(4)
# 页面的获取使用driver.page_source
if "请输入验证码" in driver.page_source:
    img_ele = driver.find_element_by_xpath('//a[@class = "code W_fl"]/img')
    img_link = img_ele.get_attribute('src')
    response1 = requests.get(img_link)
    with open('yanzhengma.jpg', 'wb') as f:
        f.write(response1.content)
    input_src = input('请输入验证码：')
    driver.find_element_by_name('verifycode').send_keys(input_src)
    driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(20)

cookie_list = driver.get_cookies()
print(cookie_list)
# cookie是一个字符串，全是键值对的匹配，但cookie_list是一个字典，需要转化成字典
cookie_item_str_list = []
for cookie_item in cookie_list:
    name = cookie_item['name']
    value = cookie_item['value']
    cookie_item_str = name + '=' + value
    cookie_item_str_list.append(cookie_item_str)

cookie_str = ';'.join(cookie_item_str_list)


url = 'https://account.weibo.com/set/index?topnav=1&wvr=6'

headers ={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
    'cookie':cookie_str
}
response = requests.get(url,headers = headers)
with open('weibo.html', 'wb') as f:
    f.write(response.content)

learner_pu

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
多进程编程、微博登录（获取cookie）

获取xicidaili能用的ip,多进程编程import requestsfrom lxml import etreeimport timefrom multiprocessing import Poolclass GetProxy(object): def get_all_proxy(self): assert(0) # 执行到这，必然报错 def...
复制链接

扫一扫

专栏目录