# coding:utf-8
import re
import requests
# 获取网页内容
import pickle
r = requests.get('http://www.163.com')
data = r.text
serialInfo = '';
# 利用正则查找所有连接
link_list =re.findall(r"(?<=content=\").+?(?=\")|(?<=content=\').+?(?=\')" ,data)
for url in link_list:
serialInfo += url+'----';
print url
f=open('demo.txt','wb',0)
pickle.dump(serialInfo,f)
obj2 = pickle.load(open("demo.txt", "r"))
print '-------->'
print obj2
f.close()
利用python 实现简单 爬虫
最新推荐文章于 2022-07-03 21:00:32 发布