获取xicidaili能用的ip,多进程编程
import requests
from lxml import etree
import time
from multiprocessing import Pool
class GetProxy(object):
def get_all_proxy(self):
assert(0)
def validate_proxy(self, proxy_str):
url = 'http://www.baidu.com'
proxy = {
'http': proxy_str,
'https': proxy_str
}
try:
response = requests.get(url, timeout=5, proxies=proxy)
print('这个proxy好用', proxy)
return proxy
except:
print("这个ip不行", proxy)
return None
def validate_proxy_concurrent(self):
pool = Pool(30)
res_list = []
for proxy in self.get_all_proxy():
res = pool.apply_async(func = self.validate_proxy, args = (proxy,))
res_list.append(res)
good_proxy_list = []
for res in res_list:
good_proxy = res.get()
if good_proxy:
good_proxy_list.append(good_proxy)
pool.close()
pool.join()
return good_proxy_list
class Getxicidailiproxy(GetProxy):
def get_all_proxy(self):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
}
url = 'https://www.xicidaili.com/nn/'
response = requests.get(url, headers = headers)
html_ele = etree.HTML(response.text)
tr_ele_list = html_ele.xpath('//table[@id="ip_list"]/tr')
tr_ele_list = tr_ele_list[4:]
for tr_ele in tr_ele_list:
ip = tr_ele.xpath('./td[2]/text()')[0]
port = tr_ele.xpath('./td[3]/text()')[0]
proxy_str = 'http://' + ip + ':' + port
yield proxy_str
if __name__ == '__main__':
start_time = time.time()
xici_proxy = Getxicidailiproxy()
good_proxy_list = xici_proxy.validate_proxy_concurrent()
print('所有的好用的proxy是:')
print(good_proxy_list)
end_time = time.time()
print("花费时间",end_time - start_time)
微博自动登录
from selenium import webdriver
import time
import requests
driver = webdriver.Chrome()
driver.get('http://weibo.com/')
time.sleep(10)
driver.find_element_by_id('loginname').send_keys('18804899903')
driver.find_element_by_name('password').send_keys('insist44668')
driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(4)
if "请输入验证码" in driver.page_source:
img_ele = driver.find_element_by_xpath('//a[@class = "code W_fl"]/img')
img_link = img_ele.get_attribute('src')
response1 = requests.get(img_link)
with open('yanzhengma.jpg', 'wb') as f:
f.write(response1.content)
input_src = input('请输入验证码:')
driver.find_element_by_name('verifycode').send_keys(input_src)
driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
time.sleep(20)
cookie_list = driver.get_cookies()
print(cookie_list)
cookie_item_str_list = []
for cookie_item in cookie_list:
name = cookie_item['name']
value = cookie_item['value']
cookie_item_str = name + '=' + value
cookie_item_str_list.append(cookie_item_str)
cookie_str = ';'.join(cookie_item_str_list)
url = 'https://account.weibo.com/set/index?topnav=1&wvr=6'
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
'cookie':cookie_str
}
response = requests.get(url,headers = headers)
with open('weibo.html', 'wb') as f:
f.write(response.content)