#本脚本主要实现循环爬取数据后:#1、同一类数据统一写入到同一个数组中,#2、读取数组数据写入指定的excel列中,实现最终数据爬取
import xlrd #引入读取excel库
import requests #倒入requests库
from lxml import etree #倒入lxml 库(没有这个库,pip install lxml安装)
importosimportsysimportopenpyxlimportre
path=os.path.abspath(os.path.dirname(sys.argv[0]))#读取excel文档内的股票代码
defcode():
wb= xlrd.open_workbook(path+'\\stock.xlsx')#打开Excel文件
data = wb.sheet_by_name('Sheet1')#通过excel表格名称(rank)获取工作表
b=data.col_values(0)#获取第一列数据(数组)
list=[]for c in b[1:]:#for循环,排除第一行数据
d=int(c)
s="%06d" % d#股票代码一共有6位,常规打印无法打印出首位带0的代码的0部分,补齐缺失的0
#print(s)
list.append(s)return(list)
code=code()#code函数获取的代码,循环爬取代码对应的股票数据,将股票数据写入对应的数组(同一类)中
defget(code):
list_name=[]#股票名称
list_score=[]#综合评分
list_Short=[]#短期趋势
list_Metaphase=[]#中期趋势
list_Long=[]#长期趋势
list_comprehensive=[]#综合评判
list_day=[]#5日涨幅
list_mouth=[]#3个月涨幅
list_year=[]#1年涨幅
for num incode:
url='http://stockpage.10jqka.com.cn/'+num+'/'headers={'Accept-Encoding': 'gzip, deflate','Accept-Language': 'zh-CN,zh;q=0.9','Upgrade-Insecure-Requests': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8','Referer': 'http://doctor.10jqka.com.cn/603160/','Connection': 'keep-alive','Cache-Control': 'max-age=0',
}
response= requests.get(url, headers=headers).text
html=etree.HTML(response)
b= html.xpath('//h1[@class = "m_logo fl"]/a/strong/text()')#print(b[0])#股票名称
c = html.xpath('//span[@class = "analyze-tips mt7"]/text()')#print(c[0])#综合评分
d = html.xpath('//div[@class = "analyze-txt fr"]/div/div[2]/text()')#print("短期趋势:",d[0])#短期趋势
e = html.xpath('//div[@class = "analyze-txt fr"]/div[2]/div[2]/text()')#print("中期趋势:",e[0])#中期趋势
f = html.xpath('//div[@class = "analyze-txt fr"]/div[3]/div[2]/text()')#print("远期趋势:",f[0])#远期趋势
g = html.xpath('//div[@class = "txt-phra"]/text()')
h= html.xpath('//div[@class = "txt-phra"]/strong/text()')
i= html.xpath('//div[@class = "txt-phra"]/text()[2]')#print(g[0],h[0],i[0])#综合评判
m=g[0]+h[0]+i[0]#j = html.xpath('//tr[@class = "even hot_cont"]/td[2]/text()')
#k = html.xpath('//tr[@class = "even hot_cont"]/td[3]/text()')
#l = html.xpath('//tr[@class = "even hot_cont"]/td[4]/text()')
tr_content = re.findall('
(.*?)td_content= re.findall('
(.*?)#print("5日涨幅:",j[0])#5日涨幅
#print("3个月涨幅:",k[0])#3个月涨幅
#print("1年涨幅:",l[0])#1年涨幅
list_name.append(b[0])#股票名称数组
list_score.append(c[0])#综合评分
list_Short.append(d[0])#短期趋势
list_Metaphase.append(e[0])#中期趋势
list_Long.append(f[0])#长期趋势
list_comprehensive.append(m)#综合评判
list_day.append(td_content[1])#5日涨幅
list_mouth.append(td_content[2])#3个月涨幅
list_year.append(td_content[3])#1年涨幅
return(list_name,list_score,list_Short,list_Metaphase,
list_Long,list_comprehensive,list_day,list_mouth,list_year)
get=get(code)defbaidu(code):
list_Price=[]
list_market=[]for num incode:
cookies={'BIDUPSID': '90EF3BD78F53BC8C96DF84CD3854CA2D','PSTM': '1578233930','BD_UPN': '12314753','BAIDUID': '885754C8E6BD7B1A771802631815CC6D:FG=1','BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598','BDUSS': 'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV','COOKIE_SESSION': '7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6','cflag': '13%3A3','BD_HOME': '1','BDRCVFR[feWj1Vr5u3D]': 'I67x6TjHwwYf0','delPer': '0','BD_CK_SAM': '1','PSINO': '3','H_PS_PSSID': '1438_21104_26350','H_PS_645EC': '29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8','BDSVRTM': '121','WWW_ST': '1580466352318',
}
headers={'is_xhr': '1','Accept-Encoding': 'gzip, deflate, br','Accept-Language': 'zh-CN,zh;q=0.9','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36','is_pbs': num,'Accept': '*/*','Referer': 'https://www.baidu.com/s?wd='+num+'&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq='+num+'&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935','X-Requested-With': 'XMLHttpRequest','Connection': 'keep-alive','is_referer': 'https://www.baidu.com/s?wd='+num+'&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359',
}
params=(
('ie', ['utf-8', 'utf-8']),
('newi', '1'),
('mod', '1'),
('isbd', '1'),
('isid', 'b379448d00013935'),
('wd', num),
('rsv_spt', '1'),
('rsv_iqid', '0xa5a17c8700013159'),
('issp', '1'),
('f', '8'),
('rsv_bp', '1'),
('rsv_idx', '2'),
('rqlang', 'cn'),
('tn', 'baiduhome_pg'),
('rsv_enter', '0'),
('rsv_dl', 'tb'),
('oq', num),
('rsv_t', '29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8'),
('rsv_pq', 'b379448d00013935'),
('bs', num),
('rsv_sid', '1438_21104_26350'),
('_ss', '1'),
('clist', ''),
('hsug', ''),
('f4s', '1'),
('csor', '6'),
('_cr1', '29647'),
)
response= requests.get('https://www.baidu.com/s', headers=headers, params=params, cookies=cookies).text
html=etree.HTML(response)
a= html.xpath('//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()')#print('当前价格:',a[0])#当前价格
b = html.xpath('//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()')#print('当前市值:',b[0])#当前市值
list_Price.append(a[0])#当前价格
list_market.append(b[0])#当前市值
return(list_Price,list_market)
baidu=baidu(code)#读取get函数生成的股票数据,依次写入到excel文档中
xfile = openpyxl.load_workbook(path+'\\stock.xlsx')#加载文件
sheet1 =xfile.worksheets[0]#excel中单元格为B2开始,即第2列,第2行
for i in range(len(get[0])):#股票名称
sheet1.cell(i+2, 2).value=get[0][i]for i in range(len(baidu[0])):#当前价格
sheet1.cell(i+2, 3).value=baidu[0][i]for i in range(len(baidu[1])):#当前市值
sheet1.cell(i+2, 4).value=baidu[1][i]for i in range(len(get[1])):#综合评分
sheet1.cell(i+2, 5).value=get[1][i]for i in range(len(get[2])):#短期趋势
sheet1.cell(i+2, 6).value=get[2][i]for i in range(len(get[3])):#中期趋势
sheet1.cell(i+2, 7).value=get[3][i]for i in range(len(get[4])):#长期趋势
sheet1.cell(i+2, 8).value=get[4][i]for i in range(len(get[5])):#综合评判
sheet1.cell(i+2, 9).value=get[5][i]for i in range(len(get[6])):#5日涨幅
sheet1.cell(i+2, 10).value=get[6][i]for i in range(len(get[7])):#3个月涨幅
sheet1.cell(i+2, 11).value=get[7][i]for i in range(len(get[8])):#1年涨幅
sheet1.cell(i+2, 12).value=get[8][i]
xfile.save(path+'\\stock.xlsx')print("爬取完成")