一:使用
import scrapy
from stock.items import StockItem, StockDetailItem
class ThsSpider(scrapy.Spider):
name = 'ths'
handle_httpstatus_list = [401]
allowed_domains = ['q.10jqka.com.cn', 'stockpage.10jqka.com.cn']
base_url = "https://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/"
end_url = "/ajax/1/"
url = base_url + "1" + end_url
start_urls = [url]
code_item_dict = dict()
code_stock_detail_dict = dict()
index_name_dict = {
1: "id",
2: "code",
3: "name",
4: "price",
5: "quota_change",
6: "up_and_down",
7: "up_speed",
8: "change_hand",
9: "quantity_ratio",
10: "amplitude",
11: "turnover",
12: "outstanding_shares",
13: "circulating_market_value",
14: "pe_ratio"
}
stock_detail_dict = {
1: 'area',
2: 'involves_concepts',
3: 'main_business',
4: 'listing_date',
5: 'net_assets_per_share',
6: 'eps',
7: 'net_profit',
8: 'net_profit_rate',
9: 'operating_income',
10: 'cash_flow_per_share',
11: 'provident_fund_per_share',
12: 'undistributed_earnings_per_share',
13: 'total_share_capital',
14: 'outstanding_shares'
}
cookied = {
'spversion': '20130314',
'searchGuide': 'sg',
'historystock': '603098%7C*%7C002196%7C*%7C603825',
'Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1': '1644137307,1644137325,1644499952,1645343560',
'v': "A84mmjv8I_W-oJQJxzoL-0Q1H6-VT511pBpGAvgcO66NTGARYN_iWXSjlfXL",
}
headers = {
'Host': 'q.10jqka.com.cn',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'sec-ch-ua-platform': "Windows",
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'none;',
'Sec-Fetch-Mode': 'navigate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': 'spversion=20130314; searchGuide=sg; Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1644137307,1644137325,1644499952,1645343560; historystock=603098%7C*%7C002196%7C*%7C603825; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1645538179; v=A-oCTkeIfKUaPvBdSIqnlwDpO1uJW261YN_iWXSjlj3Ip4TFXOu-xTBvMoRH',
}
def parse(self, response):
stock_list = response.xpath("//table[@class='m-table m-pager-table']/tbody/tr")
for stock in stock_list:
stock_details = stock.xpath("./td")
item = dict()
for index, td in enumerate(stock_details):
if index == 1 or index == 2:
item[self.index_name_dict[index + 1]] = td.xpath("./a/text()").extract()[0]
item["url"] = td.xpath("./a/@href").extract()[0]
elif index < 14:
item[self.index_name_dict[index+1]] = td.xpath("./text()").extract()[0]
self.code_item_dict[item["code"]] = item
for i in range(2, 3):
next_url = self.base_url + str(i) + self.end_url
yield scrapy.http.Request(url=next_url, cookies=self.cookied, callback=self.next_parse)
# print(len(self.code_item_dict))
# for code, item in self.code_item_dict.items():
# yield scrapy.http.Request(url=item["url"], cookies=self.cookied, meta={'meta_1': code},
# callback=self.stock_detail_parse)
# print(len(self.code_stock_detail_dict))
def next_parse(self, response):
stock_list = response.xpath("//table[@class='m-table m-pager-table']/tbody/tr")
for stock in stock_list:
stock_details = stock.xpath("./td")
item = dict()
for index, td in enumerate(stock_details):
if index == 1 or index == 2:
item[self.index_name_dict[index + 1]] = td.xpath("./a/text()").extract()[0]
item["url"] = td.xpath("./a/@href").extract()[0]
elif index < 14:
item[self.index_name_dict[index + 1]] = td.xpath("./text()").extract()[0]
self.code_item_dict[item["code"]] = item
def stock_detail_parse(self, response):
code = response.meta['meta_1']
content_list = response.xpath("//dl[@class='company_details']/dd")
item = dict()
for index, content in enumerate(content_list):
if index == 0:
item[self.stock_detail_dict[index + 1]] = content.xpath("./text()").extract()[0]
elif index == 1:
item[self.stock_detail_dict[index + 1]] = content.xpath("./@title").extract()[0]
elif index == 3:
item[self.stock_detail_dict[index]] = content.xpath("./@title").extract()[0]
elif index > 3:
item[self.stock_detail_dict[index]] = content.xpath("./text()").extract()[0]
self.code_stock_detail_dict[code] = item
2 items
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class StockItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
# 代码
code = scrapy.Field()
# 名称
name = scrapy.Field()
# 股价
price = scrapy.Field()
# 涨跌幅(%)
quota_change = scrapy.Field()
# 涨跌
up_and_down = scrapy.Field()
# 涨速(%)
up_speed = scrapy.Field()
# 换手(%)
change_hand = scrapy.Field()
# 量比
quantity_ratio = scrapy.Field()
# 振幅(%)
amplitude = scrapy.Field()
# 成交额
turnover = scrapy.Field()
# 流通股
outstanding_shares = scrapy.Field()
# 流通市值
circulating_market_value = scrapy.Field()
# 市盈率
pe_ratio = scrapy.Field()
# 股票详情
stock_url = scrapy.Field()
class StockDetailItem(scrapy.Item):
# 代码
code = scrapy.Field()
# 名称
name = scrapy.Field()
# 所属地域
area = scrapy.Field()
# 涉及概念
involves_concepts = scrapy.Field()
# 主营业务
main_business = scrapy.Field()
# 上市日期
listing_date = scrapy.Field()
# 净利润
net_profit =scrapy.Field()
# 净利润增长率
net_profit_rate = scrapy.Field()
# 营业收入
operating_income = scrapy.Field()
# 每股现金流
cash_flow_per_share = scrapy.Field()
# 每股公积金
provident_fund_per_share = scrapy.Field()
# 每股未分配利润
undistributed_earnings_per_share = scrapy.Field()
# 总股本
total_share_capital = scrapy.Field()
# 流通股
outstanding_shares = scrapy.Field()
# 大单流入
large_single_inflow = scrapy.Field()
# 大单流出
large_single_outflow = scrapy.Field()
# 中单流入
mid_single_inflow = scrapy.Field()
# 中单流出
mid_single_outflow = scrapy.Field()
# 小单流入
small_single_inflow = scrapy.Field()
# 小单流出
small_single_outflow = scrapy.Field()