使用python爬取新闻标题及链接,解析数据保存为excel文件。
import re
import requests
from lxml import etree
import pandas as pd
url = 'http://news.baidu.com/'
#请求头
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36'
}
response = requests.get(url, headers=headers)
data = response.content.decode('utf-8')
with open(