1 需求
利用自定义函数实现批量爬取多家公司的新闻。
2 代码实现
from selenium import webdriver
import re
def dongfang(company):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
browser.get('https://so.eastmoney.com/news/s?keyword=' + company)
data = browser.page_source
p_title = '<div class="news_item_t"><a href=.*?<em><em>(.*?</a></div>)'
title = re.findall(p_title, data)
p_href = '<div class="news_item_t"><a href="(.*?)" target="_blank">'
href = re.findall(p_href, data)
p_date = '<div class="news_item_c"><span class="news_item_time">(.*?)</span><span>'
date = re.findall(p_date, data)
for index in range(len(title)):
title[index] = re.sub('<.*?>', '', title[index]).strip()
print(str(index + 1) + "." + title[index] + "\t" + date[index])
print("(" + href[index] + ")")
if __name__ == '__main__':
companies = ['格力电器', '阿里巴巴', '京东', "华能信托"]
for company in companies:
try:
dongfang(company)
print(company + "东方财富网爬取成功!")
except:
print(company + "东方财富网爬取失败!")