python爬取某网站分类股票url参数,为获取数据做准备

import requests
from bs4 import BeautifulSoup

qh_url = '''http://quotes.money.163.com/old/'''

# qh_params = {
#     'code' : '0600754',
#     'start' : '19900101',
#     'end' : '20201025',
#     'fields' : 'TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP'     #页码
# }


qh_headers = {
    'Referer' : '',
    'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safar',
    'X-Request-Type' : '',
    'Connection': 'close',
    'X-Requested-With' : ''
}

#请求数据
def qh_requests_get(qh_url,qh_headers):
    qh_p = requests.get(qh_url,headers=qh_headers)
    print(qh_p.url)
    qh_p.encoding = 'utf-8'
    return qh_p.text

aa = qh_requests_get(qh_url,qh_headers)

# print(aa)

qh_soup = BeautifulSoup(aa,"lxml")

qh_soup = qh_soup.find_all('div',attrs={'id' : 'ntesTree'})[0]
# print(qh_soup)
# 分类市场
# qh_soup = qh_soup.find_all('li',attrs={'id': 'f0'})[0]
# print(qh_soup)
qh_soup = qh_soup.find_all('li',attrs={'id': 'f0-f3'})[0]
# print(qh_soup)
qh_soup = qh_soup.find_all('li')
# print(qh_soup)
#初始化列表
qh_JieGuo_list = []
for qh_row in qh_soup:
    #初始化行列表
    qh_row_list = []
    qh_title = qh_row.a["title"]
    qh_row_list.append(qh_title)
    # print(qh_title,end=",")
    qh_qcond = qh_row.a.parent["qcond"]
    #分割数据生成列表
    qh_qcond = qh_qcond.split(";")
    qh_row_list = qh_row_list + qh_qcond
    # print(qh_row.a.parent["qcond"], end=",")
    # A+B股,A+H股没有qquery属性,会报错,所以需要特殊处理;
    try:
        qh_qquery = qh_row.a.parent["qquery"]
        qh_row_list.append(qh_qquery)
        # print(qh_qquery, end=",")
    except:
        if qh_row.a["title"] == "A+B股":
            qh_qquery = "AB"
            qh_row_list.append(qh_qquery)
            # print(qh_qquery, end=",")
        elif qh_row.a["title"] == "A+H股":
            qh_qquery = ""
            qh_row_list.append(qh_qquery)
            # print(qh_qquery, end=",")
    #主机名
    qh_host = """http://quotes.money.163.com/hs/service/diyrank.php"""
    qh_row_list.append(qh_host)
    #字段名
    qh_fields = """NO,SYMBOL,NAME,PRICE,PERCENT,UPDOWN,FIVE_MINUTE,OPEN,YESTCLOSE,HIGH,LOW,VOLUME,TURNOVER,HS,LB,WB,ZF,PE,MCAP,TCAP,MFSUM,MFRATIO.MFRATIO2,MFRATIO.MFRATIO10,SNAME,CODE,ANNOUNMT,UVSNEWS"""
    qh_row_list.append(qh_fields)
    qh_JieGuo_list.append(qh_row_list)
    # print(qh_row)
print(qh_JieGuo_list)

for  qh_row in qh_JieGuo_list:
    print(qh_row)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值