刚发现request-html库,比requests好用,
之前用requests爬过新浪新闻标题,
这次用request-html爬取网易财经上市公司名称业绩,和东方财富网的上市公司新闻内容
crawler_entity.py
from requests_html import HTMLSession
import csv
import os
import random
USER_AGENTS= [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
#网上找的几个#
......
]
session = HTMLSession()
#urlroot = 'https://car.autohome.com.cn/'
url_base = 'http://quotes.money.163.com/data/caibao/yjgl_ALL.html?reportdate=20181231&sort=publishdate&order=desc&page='
def get_companyentity():
current_dir = os.path.abspath('.')
print(current_dir)
file_name = os.path.join(current_dir, "entity.csv")
print(file_name)
with open(file_name, 'wt', newline='') as csvfile:
writer = csv.writer(csvfile)
header = ['序号','代码'