#encoding=utf8
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup
session=requests.Session()
User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
header = {}
header['User-Agent'] = User_Agent
url = 'http://www.kuaidaili.com/'
req = session.get(url,headers=header)
soup = BeautifulSoup(req.text,"lxml")
ips = soup.findAll('tr')
f = open("proxy.txt","w")
for x in range(1,len(ips)):
#for x in range(1,20):
ip = ips[x]
tds = ip.findAll("td")
ip_temp = tds[0].contents[0]+"\t"+tds[1].contents[0]+"\n"
# print tds[2].contents[0]+"\t"+tds[3].contents[0]
f.write(ip_temp)
f.close
import requests
from bs4 import BeautifulSoup
f = open(r"E:\Users\Desktop\proxy.txt")
lines = f.readlines()
proxys = []
proxys_new=[]
for i in range(0,len(lines)):
ip = lines[i].strip("\n").split("\t")
proxy_host = "http://"+ip[0]+":"+ip[1]
proxy_temp = {"http":proxy_host}
proxys.append(proxy_temp)
url = "http:"
for proxy in proxys:
try:
page=requests.get(url, proxies=proxy,timeout=1)
bs=BeautifulSoup(page.text,"lxml")
if bs.h1.get_text()=='':
proxys_new.append(proxy)
except Exception :
print (proxy)
continue
import requests
from bs4 import BeautifulSoup
proxies = {
"http": "http://112.229.101.90:8118"
}
page=requests.get("http://www.poi86.com/poi/amap/40.html", proxies=proxies)
bs=BeautifulSoup(page.text,"lxml")
print(bs)
import socks
import socket
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 8787)
socket.socket = socks.socksocket
import requests
page=requests.get("http://www.google.com.hk/")
bs=BeautifulSoup(page.text,"lxml")
print(bs)
print (requests.get('http://www.google.com'))