学Python,用RPA
艺赛旗RPA2020.1版本 正在免费下载使用中,欢迎下载使用
www.i-search.com.cn/index.html?from=line1
爬取后,方便大家直接在excel中找到已分享的经验的标题与对应的链接。代码如下:
import requests, xlwt
from bs4 import BeautifulSoup
from lxml import etree
url_list = [] # 用于存放标题和url
获取源码
def get_content(url):
html = requests.get(url).content
return html
获取某页中的所有帖子的url
def get_url(html):
soup = BeautifulSoup(html, ‘lxml’) # lxml是解析方式,第三方库
blog_url_list = soup.find_all(‘h2’, class_=‘fn-ellipsis’)
for i in blog_url_list:
url_list.append([i.find(‘a’).text, i.find(‘a’)[‘href’]])
# print(url_list)
last_list.append(url_list)
start_url = ‘http://support.i-search.com.cn/recent?p=1’
response = requests.get(start_url)
content = response.text
selector = etree.HTML(content)
maxPag