之前一直采用tushare.get_today_all() 获取市盈率和市净率,不知道是不是因为接口不再维护,获取的信息跟软件上的信息存在出入,刚好这段时间学习爬虫,因此写了段从东方财富网站获取市盈率和市净率的代码:
def web_data_to_json(url): res = requests.get(url, timeout=30) strdata = res.text strindex = strdata.find("diff\":[") if strindex != -1: jsonstr1 = strdata[(strindex + 6):] jsonstr2 = jsonstr1[:-4] str_to_json = json.loads(jsonstr2) else: str_to_json=None return str_to_json
def get_per_pb(basic_url): df = pd.DataFrame(columns=["code", "per", "pb"]) for i in range(1, 200): if i % 5 == 0: print("the {}th web page, total 200 web pages".format(i)) url1 = basic_url[:95] url2 = basic_url[96:] url = url1 + str(i) + url2 data_list = web_data_to_json(url) if data_list is None: