Catalog
爬取并执行【被爬取的代码】
import requests, re
url = 'https://blog.csdn.net/Yellow_python/article/details/81251877'
header = {'User-Agent': 'Opera/8.0 (Windows NT 5.1; U; en)'}
r = requests.get(url, headers=header)
func = re.findall('<pre><code>([\s\S]+?)</code></pre>', r.text)[0].strip()
exec(func)
proxies = get_proxies()
for proxy in proxies:
print(proxy)
被爬取的代码
def get_proxies():
url = 'http://www.xicidaili.com/nn/' # 国内高匿代理IP
header = {'User-Agent': 'Opera/8.0 (Windows NT 5.1; U; en)'}
r = requests.get(url, headers=header).text
pt = '\Wtd\W([0-9.]{7,15})\W/td\W\s+\Wtd\W(\d+)\W/td\W\s+\Wtd\W[\s\S]+?\W/td\W\s+\Wtd class="country"\W高匿\W/td\W\s+\Wtd\W(.+?)\W/td\W'
ls = re.findall(pt, r)
print(ls)
return [{i[2].lower(): i[0] + ':' + i[1]} for i in ls]
基础补充
1、exec和eval函数
-
exec
-
执行字符串形式的代码
返回None
eval
-
执行字符串形式的表达式
返回执行结果
In [1]: eval('a+1',{'a':2})
Out[1]: 3
In [2]: exec('print(100)')
100