import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# Mysql常规操作
class MysqlHandler:
def __init__(self):
self.conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')
def delete(self, sql):
cursor = self.conn.cursor()
cursor.execute(sql)
self.conn.commit()
cursor.close()
self.conn.close()
###################################################################################################################################
# Tushare接口数据请求
# 1、股票列表
# 2、日线行情
# 3、资金流向
# 4、沪深股通
# 5、涨停跌停
# 6、shibor
# 7、所属概念
# 8、涨跌停价格
class TuShare:
def __init__(self):
self.date = time.strftime("%Y%m%d", time.localtime())
self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')
self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
def stock_basic(self):
data1 = self.pro.stock_basic(list_status='L', fields='ts_code,name,area,industry,market,exchange,list_date,is_hs')
data1['area'] = data1['area'].apply(lambda x: "广东" if (x == '深圳') == True else x)
stock_csv = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\行业概念.csv', encoding='gbk')[['股票代码', '股票简称', '所属概念', '所属同花顺行业']]
dict = {}
data2 = pd.DataFrame()
for index, row in stock_csv.iterrows():
stk_code = row['股票代码']
yjhy = (row['所属同花顺行业'].split('-'))[0]
ejhy = (row['所属同花顺行业'].split('-'))[1]
sjhy = (row['所属同花顺行业'].split('-'))[2]
dict = {'ts_code': stk_code, 'yjhy': yjhy, 'ejhy': ejhy, 'sjhy': sjhy}
data2 = data2.append(dict, ignore_index=True)
data2 = data2[['ts_code', 'yjhy', 'ejhy', 'sjhy']]
data = pd.merge(data1, data2, how='left', on='ts_code')
data.to_sql('stock_basic', con=self.engine, if_exists='append', index=False)
print(data.head(5))
print('--------------------程序执行完毕!--------------------')
def stock_daily(self):
data = self.pro.daily(trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
print(data.head(5))
data.to_sql('stock_daily', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def index_daily(self):
data1 = self.pro.index_daily(ts_code='000001.SH', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
data2 = self.pro.index_daily(ts_code='399001.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
data3 = self.pro.index_daily(ts_code='399006.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
data = pd.concat([data1, data2, data3], ignore_index=True)
print(data)
data.to_sql('index_daily', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def stock_limit_price(self):
data = self.pro.stk_limit(trade_date=self.date, fields='trade_date,ts_code,pre_close,up_limit,down_limit')
print(data.head(5))
data.to_sql('stock_limit_price', con=self.engine, if_exists='replace', index=False)
print('--------------------程序执行完毕!--------------------')
def stock_daily_basic(self):
data = self.pro.daily_basic(trade_date=self.date, fields='ts_code,trade_date,turnover_rate,turnover_rate_f,volume_ratio,total_share,float_share,free_share')
print(data.head(5))
data.to_sql('stock_daily_basic', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def stock_moneyflow(self):
data = self.pro.moneyflow(trade_date=self.date, fields='ts_code,trade_date,buy_md_amount,sell_md_amount,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount')
print(data.head(5))
data.to_sql('stock_moneyflow', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def hsgt_moneyflow(self):
data = self.pro.moneyflow_hsgt(trade_date=self.date, fields='trade_date,hgt,sgt,north_money')
print(data.head(5))
data.to_sql('hsgt_moneyflow', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def limit_list(self):
data = self.pro.limit_list(trade_date=self.date, fields='trade_date,ts_code,close,pct_chg,fc_ratio,fd_amount,first_time,last_time,open_times,strth,limit')
print(data.head(5))
data.to_sql('limit_stock_list', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def shibor(self):
data = self.pro.shibor(date=self.date, fields='date,on,1m')
print(data.head(5))
data.to_sql('shibor', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def pre_cal_date(self):
t1 = datetime.date.today()
t2 = t1 - datetime.timedelta(days=90)
start_date = t2.strftime('%Y%m%d')
end_date = t1.strftime('%Y%m%d')
data = self.pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, fields='exchange,cal_date,is_open,pretrade_date')
print(data.head(5))
data.to_sql('trade_cal_date', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
if __name__ == "__main__":
t1 = time.time()
conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')
cursor = conn.cursor()
cursor.execute('delete from stock_basic')
cursor.execute('delete from trade_cal_date')
conn.commit()
cursor.close()
conn.close()
obj1 = TuShare()
obj1.stock_daily()
obj1.index_daily()
obj1.stock_daily_basic()
obj1.stock_moneyflow()
obj1.hsgt_moneyflow()
obj1.limit_list()
obj1.shibor()
obj1.pre_cal_date()
obj1.stock_basic()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# Mysql常规操作
class MysqlHandler:
def __init__(self):
self.conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')
def delete(self, sql):
cursor = self.conn.cursor()
cursor.execute(sql)
self.conn.commit()
cursor.close()
self.conn.close()
###################################################################################################################################
# Tushare接口数据请求
# 1、股票列表
# 2、日线行情
# 3、资金流向
# 4、沪深股通
# 5、涨停跌停
# 6、shibor
# 7、所属概念
# 8、涨跌停价格
class TuShare:
def __init__(self):
self.date = time.strftime("%Y%m%d", time.localtime())
self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')
self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
def stock_basic(self):
data1 = self.pro.stock_basic(list_status='L', fields='ts_code,name,area,industry,market,exchange,list_date,is_hs')
data1['area'] = data1['area'].apply(lambda x: "广东" if (x == '深圳') == True else x)
stock_csv = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\行业概念.csv', encoding='gbk')[['股票代码', '股票简称', '所属概念', '所属同花顺行业']]
dict = {}
data2 = pd.DataFrame()
for index, row in stock_csv.iterrows():
stk_code = row['股票代码']
yjhy = (row['所属同花顺行业'].split('-'))[0]
ejhy = (row['所属同花顺行业'].split('-'))[1]
sjhy = (row['所属同花顺行业'].split('-'))[2]
dict = {'ts_code': stk_code, 'yjhy': yjhy, 'ejhy': ejhy, 'sjhy': sjhy}
data2 = data2.append(dict, ignore_index=True)
data2 = data2[['ts_code', 'yjhy', 'ejhy', 'sjhy']]
data = pd.merge(data1, data2, how='left', on='ts_code')
data.to_sql('stock_basic', con=self.engine, if_exists='append', index=False)
print(data.head(5))
print('--------------------程序执行完毕!--------------------')
def stock_daily(self):
data = self.pro.daily(trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
print(data.head(5))
data.to_sql('stock_daily', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def index_daily(self):
data1 = self.pro.index_daily(ts_code='000001.SH', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
data2 = self.pro.index_daily(ts_code='399001.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
data3 = self.pro.index_daily(ts_code='399006.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')
data = pd.concat([data1, data2, data3], ignore_index=True)
print(data)
data.to_sql('index_daily', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def stock_limit_price(self):
data = self.pro.stk_limit(trade_date=self.date, fields='trade_date,ts_code,pre_close,up_limit,down_limit')
print(data.head(5))
data.to_sql('stock_limit_price', con=self.engine, if_exists='replace', index=False)
print('--------------------程序执行完毕!--------------------')
def stock_daily_basic(self):
data = self.pro.daily_basic(trade_date=self.date, fields='ts_code,trade_date,turnover_rate,turnover_rate_f,volume_ratio,total_share,float_share,free_share')
print(data.head(5))
data.to_sql('stock_daily_basic', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def stock_moneyflow(self):
data = self.pro.moneyflow(trade_date=self.date, fields='ts_code,trade_date,buy_md_amount,sell_md_amount,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount')
print(data.head(5))
data.to_sql('stock_moneyflow', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def hsgt_moneyflow(self):
data = self.pro.moneyflow_hsgt(trade_date=self.date, fields='trade_date,hgt,sgt,north_money')
print(data.head(5))
data.to_sql('hsgt_moneyflow', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def limit_list(self):
data = self.pro.limit_list(trade_date=self.date, fields='trade_date,ts_code,close,pct_chg,fc_ratio,fd_amount,first_time,last_time,open_times,strth,limit')
print(data.head(5))
data.to_sql('limit_stock_list', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def shibor(self):
data = self.pro.shibor(date=self.date, fields='date,on,1m')
print(data.head(5))
data.to_sql('shibor', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def pre_cal_date(self):
t1 = datetime.date.today()
t2 = t1 - datetime.timedelta(days=90)
start_date = t2.strftime('%Y%m%d')
end_date = t1.strftime('%Y%m%d')
data = self.pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, fields='exchange,cal_date,is_open,pretrade_date')
print(data.head(5))
data.to_sql('trade_cal_date', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
if __name__ == "__main__":
t1 = time.time()
conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')
cursor = conn.cursor()
cursor.execute('delete from stock_basic')
cursor.execute('delete from trade_cal_date')
conn.commit()
cursor.close()
conn.close()
obj1 = TuShare()
obj1.stock_daily()
obj1.index_daily()
obj1.stock_daily_basic()
obj1.stock_moneyflow()
obj1.hsgt_moneyflow()
obj1.limit_list()
obj1.shibor()
obj1.pre_cal_date()
obj1.stock_basic()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import math
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# 东方财富爬虫
# 1、股票异动
# 2、沪深股通
class Eastmoney:
def __init__(self):
self.session = requests.Session()
self.date = time.strftime("%Y%m%d", time.localtime())
self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
self.timestamp = int(round(time.time() * 1000))
self.url1 = 'http://push2ex.eastmoney.com/getStockCountChanges?type=4,8,16,32,64,128,8193,8194,8201,8202,8203,8204,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216'
self.url2 = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216"
self.url3 = 'http://push2ex.eastmoney.com/getTopicZTPool?'
self.ua_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'
]
def changes_max_page(self):
# 1、抓取网页数据
param = {"_": self.timestamp, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}
html = json.loads(self.session.get(url=self.url1, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text)
# 2、网页数据解析
self.changes_max_page = html['rt']
def changeSpider(self):
self.changes_max_page()
# 1、定义数据存储容器
dict = {}
data = pd.DataFrame()
# 2、爬取网页数据
for page in range(0, int(self.changes_max_page)):
param = {"pageindex": page, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}
html = json.loads(self.session.get(url=self.url2, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text)
# 3、网页数据解析
if html['data'] is None:
break
else:
allstock = html['data']['allstock']
for stock in allstock:
code = stock['c']
name = stock['n']
chg_type = stock['t']
if len(str(stock['tm'])) == 5:
chg_time = str(self.date) + '0' + str(stock['tm'])
else:
chg_time = str(self.date) + str(stock['tm'])
if chg_type in ('8201', '8202', '8207', '8209', '8211', '8215', '8204', '8203', '8208', '8210', '8212', '8216'):
chg_value = stock['i'] * 100
chg_index = '%'
elif chg_type in ('8193', '8194', '128', '64'):
chg_value = stock['i']
chg_index = '股'
else:
chg_value = stock['i']
chg_index = '元'
# 4、数据容器存储
dict = {'chg_time': chg_time, 'code': code, 'name': name, 'chg_type': chg_type, 'chg_value': chg_value, 'chg_index': chg_index}
data = data.append(dict, ignore_index=True)
print('--------------------程序执行完毕!共抓取%s页数据--------------------' % (page + 1))
data = data[['chg_time', 'code', 'name', 'chg_type', 'chg_value', 'chg_index']]
# 5、数据清洗
data = data[~ (data['code'].str.startswith('1') | data['code'].str.startswith('2') | data['code'].str.startswith('5') | data['code'].str.startswith('9'))]
data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))
print(data.head(5))
# 6、mysql持久化存储
data.to_sql('stock_change', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def limit(self):
i = math.floor(random.random() * 10000000 + 1)
t = int(time.time() * 1000)
p = 'callbackdata' + str(i + 1) + '('
q = ');'
param = {'cb': 'callbackdata' + str(i + 1), 'ut': '7eea3edcaed734bea9cbfc24409ed989', 'dpt': 'wz.ztzt', 'Pageindex': '0', 'pagesize': 920, 'sort': 'fbt:asc', 'date': self.date, '_': t}
html = self.session.get(url=self.url3, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text
html = json.loads(html.replace(p, '').replace(q, ''))
list = html['data']['pool']
dict = {}
data = pd.DataFrame()
for x in list:
date = self.date
code = x['c']
lbc = x['lbc']
hybk = x['hybk']
dict = {'date': date, 'code': code, 'lbc': lbc, 'hybk': hybk}
data = data.append(dict, ignore_index=True)
data = data[['date', 'code', 'lbc', 'hybk']]
data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))
print(data.head(5))
data.to_sql('limit_stock_lbc', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
# 主运行程序
class Main:
def main_Eastmoney(self):
obj1 = Eastmoney()
obj1.changeSpider()
obj1.limit()
###################################################################################################################################
if __name__ == "__main__":
t1 = time.time()
Main().main_Eastmoney()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# 开盘啦爬虫
# 1、风口概念
# 2、龙虎榜
class Kpl:
def __init__(self):
self.date = time.strftime("%Y-%m-%d", time.localtime())
self.session = requests.Session()
self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
self.url = 'https://pclhb.kaipanla.com/w1/api/index.php'
self.url1 = 'https://pchq.kaipanla.com/w1/api/index.php'
self.ua_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'
]
def hotConceptSpider(self):
# 1、定义数据容器
code_list = []
dict = {}
data = pd.DataFrame()
# 2、参数构造
param = {'c': 'LongHuBang', 'a': 'GetStockList', 'st': 300, 'Time': str(self.date), 'UserID': 399083, 'Token': '71aef0e806e61ad3169ddc9473e37886'}
# 3、网页数据抓取
html = json.loads(self.session.post(url=self.url, headers={'User-Agent': random.choice(self.ua_list)}, data=param).text)['list']
# 4、网页数据解析
for j in html:
code = j['ID']
name = j['Name']
code_list.append(code)
if len(j['FengKou']) == 0:
hot_cept = '无'
else:
p = j['FengKou'].split(',')
for i in p:
hot_cept = i
dict = {'date': self.date, 'code': code, 'name': name, 'hot_cept': hot_cept}
data = data.append(dict, ignore_index=True)
# 5、重命名及顺序调整
data = data[['date', 'code', 'name', 'hot_cept']]
data = data[data['code'].str.startswith('0') | data['code'].str.startswith('3') | data['code'].str.startswith('6')]
data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))
data['date'] = data['date'].apply(lambda x: str(x).replace('-', ''))
print(data.head(5))
# 6、数据存储
print('--------------------程序执行完毕!--------------------')
data.to_sql('hot_concept', con=self.engine, if_exists='append', index=False)
return code_list
def lhbSpider(self, code_list):
for code in code_list:
# 2、参数构造
param = {'c': 'Stock', 'a': 'GetNewOneStockInfo', 'StockID': code, 'Time': self.date, 'UserID': '399083', 'Token': '71aef0e806e61ad3169ddc9473e37886'}
# 3、网页数据抓取
html = json.loads(self.session.post(url=self.url, headers={'User-Agent': random.choice(self.ua_list)}, data=param).text)['List']
dict1 = {}
dict2 = {}
data1 = pd.DataFrame()
data2 = pd.DataFrame()
data = pd.DataFrame()
# 4、网页数据解析
for j in html:
for b in j['BuyList']:
date = self.date
code = code
direction = 'buy'
dname = b['Name']
buy = b['Buy']
sell = b['Sell']
try:
if b['GroupIcon'][0] == None:
groupicon = '空'
else:
groupicon = b['GroupIcon'][0]
except:
groupicon = '空'
youziicon = b['YouZiIcon']
dict1 = {'date': date, 'code': code, 'direction': direction, 'dname': dname, 'buy': buy, 'sell': sell, 'groupicon': groupicon, 'youziicon': youziicon}
data1 = data1.append(dict1, ignore_index=True)
for b in j['SellList']:
date = self.date
code = code
direction = 'sell'
dname = b['Name']
buy = b['Buy']
sell = b['Sell']
try:
if b['GroupIcon'][0] == None:
groupicon = '空'
else:
groupicon = b['GroupIcon'][0]
except:
groupicon = '空'
youziicon = b['YouZiIcon']
dict2 = {'date': date, 'code': code, 'direction': direction, 'dname': dname, 'buy': buy, 'sell': sell, 'groupicon': groupicon, 'youziicon': youziicon}
data2 = data2.append(dict2, ignore_index=True)
# 5、数据处理
data = pd.concat([data1, data2], axis=0)
data = data[['date', 'code', 'direction', 'dname', 'buy', 'sell', 'groupicon', 'youziicon']]
data = data[data['code'].str.startswith('0') | data['code'].str.startswith('3') | data['code'].str.startswith('6')]
data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))
data['date'] = data['date'].apply(lambda x: str(x).replace('-', ''))
data.to_sql('lhb_list', con=self.engine, if_exists='append', index=False)
print(data.head(5))
# mysql数据存储
print('--------------------程序执行完毕!--------------------')
def scqxSpider(self):
# 1、参数构造
param = {'c': 'PCArrangeData', 'a': 'GetIndexPlate', 'st': 3, 'StockID': 'SH000001', 'UserID': 399083, 'Token': '2292739880d01bd81e169e90a1898ebe'}
# 2、网页数据抓取
html = json.loads(self.session.post(url=self.url1, headers={'User-Agent': random.choice(self.ua_list)}, data=param).text)['Mood']
dict = {
'zrzt': html['ZRZT'], # 自然涨停
'ztzb': html['ZTZB'], # 涨停炸板
'szjs': html['SZJS'], # 上涨家数
'xdjs': html['XDJS'], # 下跌家数
'zt': html['ZT'], # 涨停家数
'dt': html['DT'], # 跌停家数
'zbl': html['ZBL'], # 炸板率
'cgl': html['CGL'], # 昨日打板成功率
'yll': html['YLL'], # 昨日打板盈利率
'day': html['day'] # 交易日期
}
data = pd.DataFrame(dict, index=[0])
data = data[['day', 'zrzt', 'ztzb', 'szjs', 'xdjs', 'zt', 'dt', 'zbl', 'cgl', 'yll']]
data.to_sql('scqx', con=self.engine, if_exists='append', index=False)
print(data)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
# 主运行程序
class Main:
def main_Kpl(self):
obj1 = Kpl()
code_list = obj1.hotConceptSpider()
obj1.lhbSpider(code_list)
obj1.scqxSpider()
###################################################################################################################################
if __name__ == "__main__":
t1 = time.time()
Main().main_Kpl()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# code标准化
class CodeHandler:
def __init__(self):
self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')
def get_all_securities(self):
self.stocks = self.pro.stock_basic(list_status='L', fields='ts_code,exchange')
def jq_code_normalize(self):
self.get_all_securities()
stocks = self.stocks['ts_code'].apply(lambda x: (x[:7] + "XSHG") if x.endswith('SH') == True else (x[:7] + "XSHE")).tolist()
return stocks
def ts_code_normalize(self):
self.get_all_securities()
stocks = self.stocks['ts_code'].tolist()
return stocks
def no_code_normalize(self):
self.get_all_securities()
stocks = self.stocks['ts_code'].apply(lambda x: x[:6])
return stocks
def else_code_normalize(self):
self.get_all_securities()
stocks = self.stocks['ts_code'].apply(lambda x: ('SH' + x[:6]) if x.endswith('SH') == True else ('SZ' + x[:6])).tolist()
return stocks
###################################################################################################################################
# 新浪财经爬虫
# 1、个股成交分价表
def divCostSpider(code):
# 1、定义数据容器
try:
dict = {}
data = pd.DataFrame()
engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
# 2、构造URL
date = time.strftime("%Y%m%d", time.localtime())
ua_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'
]
url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_price.php?symbol=' + code.lower()
# 3、抓取网页数据
html = requests.get(url=url, headers={'User-Agent': random.choice(ua_list)}).text
# 4、网页数据解析
tree = etree.HTML(html)
tr_list = tree.xpath('//*[@id="divListTemplate"]/table/tbody/tr')
for tr in tr_list:
price = tr.xpath('.//td[1]/text()')[0].strip()
volume = tr.xpath('.//td[2]/text()')[0].strip()
buyrate = str(tr.xpath('.//td[3]/text()')[0].strip()).replace('%', '')
# 5、容器存储
dict = {'code': code, 'date': date, 'price': price, 'volume': volume, 'buyrate': buyrate}
data = data.append(dict, ignore_index=True)
# print('--------------------程序执行完毕!%s数据已抓取--------------------' % code)
data = data[['code', 'date', 'price', 'volume', 'buyrate']]
data['code'] = data['code'].apply(lambda x: (x[2:] + ".SH") if x.startswith('SH') == True else (x[2:] + ".SZ"))
# print(data.head(5))
# 6、mysql持久化存储
data.to_sql('stock_divcost', con=engine, if_exists='append', index=False)
except:
print('%s数据抓取出现问题' % code)
###################################################################################################################################
# 主运行程序
class Main:
def main_Sina(self):
obj1 = CodeHandler()
code_list = obj1.else_code_normalize()
pool = Pool()
pool.map(divCostSpider, code_list)
pool.close()
pool.join()
###################################################################################################################################
if __name__ == "__main__":
t1 = time.time()
Main().main_Sina()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import math
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# 东方财富爬虫
# 1、股票异动
# 2、沪深股通
class Eastmoney:
def __init__(self):
self.session = requests.Session()
self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')
self.url1 = 'http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get?'
self.ua_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'
]
self.sql = '''SELECT pretrade_date FROM trade_cal_date WHERE exchange = 'SSE' AND is_open = 1 ORDER BY cal_date DESC LIMIT 1'''
self.dataframe = pd.read_sql(self.sql,self.engine)['pretrade_date'].tolist()
print(self.dataframe)
#self.pretrade_date = str(self.dataframe[0][:4]) + '-' + str(self.dataframe[0][4:6]) + '-' + str(self.dataframe[0][6:])
self.pretrade_date = '2020-07-22'
print(self.pretrade_date)
def hsgt_max_page(self):
param = {'type': 'HSGT20_GGTJ_SUM',
'token': '894050c76af8597a853f5b408b759f5d',
'st': 'ShareSZ_Chg_One',
'sr': '-1',
'p': '1',
'ps': '50',
'js': 'var eAEIQxxY={pages:(tp),data:(x)}',
'filter': "(DateType='1' and HdDate='{}')".format(self.pretrade_date),
'rt': '53077355'}
html = self.session.get(url=self.url1, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text
m = re.compile('var.*?{pages:(\d+),data:.*?')
p = re.search(m, html)
self.hsgt_max_page = p.group(1)
def hsgtMoneySpider(self):
self.hsgt_max_page()
dict = {}
data = pd.DataFrame()
for page in range(0, int(self.hsgt_max_page)):
param = {'type': 'HSGT20_GGTJ_SUM',
'token': '894050c76af8597a853f5b408b759f5d',
'st': 'ShareSZ_Chg_One',
'sr': '-1',
'p': page,
'ps': '50',
'js': 'var eAEIQxxY={pages:(tp),data:(x)}',
'filter': "(DateType='1' and HdDate='{}')".format(self.pretrade_date),
'rt': '53077355'}
html = self.session.get(url=self.url1, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text
# 提取出list,可以使用json.dumps和json.loads
q = re.compile('var.*?data:(.*)}', re.S)
i = re.search(q, html)
for p in json.loads(i.group(1)):
date = p['HdDate'] # 日期
code = p['SCode'] # 代码
name = p['SName'] # 名称
hyname = p['HYName'] # 所属板块
dqname = p['DQName'] # 地区板块
newprice = p['NewPrice'] # 今日收盘价
zdf = p['Zdf'] # 今日涨跌幅
sharehold = p['ShareHold'] # 今日持股股数,无单位
sharesz = p['ShareSZ'] # 今日持股市值,无单位
ltzb = p['LTZB'] # 今日持股占流通股比,无单位
sharesrate = p['SharesRate'] # 今日持股占总股本比,单位%
sharehold_chg_one = p['ShareHold_Chg_One'] # 今日增持股数,无单位
sharesz_chg_one = p['ShareSZ_Chg_One'] # 今日增持市值,无单位
ltzb_one = p['LTZB_One'] # 今日增持占流通股比,无单位
zzb_one = p['ZZB_One'] # 今日增持占总股本比,无单位
dict = {
'date': self.pretrade_date,
'code': code,
'hyname': hyname,
'dqname': dqname,
'newprice': newprice,
'zdf': zdf,
'sharehold': sharehold,
'sharesz': sharesz,
'ltzb': ltzb,
'sharesrate': sharesrate,
'sharehold_chg_one': sharehold_chg_one,
'sharesz_chg_one': sharesz_chg_one,
'ltzb_one': ltzb_one,
'zzb_one': zzb_one
}
data = data.append(dict, ignore_index=True)
data = data[['date', 'code', 'hyname', 'dqname', 'newprice', 'zdf', 'sharehold', 'sharesz', 'ltzb', 'sharesrate', 'sharehold_chg_one', 'sharesz_chg_one', 'ltzb_one', 'zzb_one']]
data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))
data['date'] = data['date'].apply(lambda x: str(x).replace('-', ''))
print(data.head(5))
data.to_sql('hsgt_detail', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
def pre_cal_date(self):
t1 = datetime.date.today()
t2 = t1 - datetime.timedelta(days=90)
start_date = t2.strftime('%Y%m%d')
end_date = t1.strftime('%Y%m%d')
data = self.pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, fields='exchange,cal_date,is_open,pretrade_date')
print(data.head(5))
data.to_sql('trade_cal_date', con=self.engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
# 主运行程序
class Main:
def main_Eastmoney(self):
obj1 = Eastmoney()
obj1.pre_cal_date()
obj1.hsgtMoneySpider()
###################################################################################################################################
if __name__ == "__main__":
t1 = time.time()
Main().main_Eastmoney()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
import requests
from jqdatasdk import *
import tushare as ts
from bs4 import BeautifulSoup
from lxml import etree
from multiprocessing import Pool
import multiprocessing
import time
import datetime
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import json
import re
import warnings
warnings.filterwarnings("ignore")
###################################################################################################################################
# Mysql常规操作
def DeleteHandler(sql):
con = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')
cursor = con.cursor()
cursor.execute(sql)
cursor.close()
###################################################################################################################################
# Tushare所属概念
def TuShareConcept():
concept_id = (pro.concept(fields='code,name'))['code'].tolist()
visit_count = 0
data = pd.DataFrame()
for id in concept_id:
if (visit_count + 1) % 199 == 0:
time.sleep(10)
else:
data = data.append(pro.concept_detail(id=id, fields='concept_name,ts_code'), ignore_index=True)
visit_count += 1
data = data[['concept_name', 'ts_code']]
print('--------------------程序执行完毕!--------------------')
return data
###################################################################################################################################
# 开盘啦所属概念
def KplConcept():
try:
date = time.strftime("%Y-%m-%d", time.localtime())
code_list = pro.stock_basic(list_status='L', fields='ts_code,name')['ts_code'].tolist()
# 1、网页数据抓取
url = 'https://pchq.kaipanla.com/w1/api/index.php'
engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
ua_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'
]
dict1 = {}
data1 = pd.DataFrame()
visit_count = 0
for code in code_list:
if (visit_count + 1) % 400 == 0:
time.sleep(60)
else:
data = {'c': 'PCArrangeData',
'a': 'GetHQPlate',
'StockID': code[:6],
'Day': date,
'time': '15:00',
'SelType': '1, 2, 3, 8, 9, 5, 6, 7',
'UserID': 399083,
'Token': '71aef0e806e61ad3169ddc9473e37886'
}
html = requests.post(url=url, data=data, headers={'User-Agent': random.choice(ua_list)}).text
# 2、网页数据解析
ts_code = json.loads(html)['trend']['code']
cp_list = json.loads(html)['stockplate']
for cp in cp_list:
concept = cp[0]
dict1 = {'concept_name': concept, 'ts_code': code}
data1 = data1.append(dict1, ignore_index=True)
data1 = data1[['concept_name', 'ts_code']]
data1 = data1[~ (data1['ts_code'].str.startswith('1') | data1['ts_code'].str.startswith('2') | data1['ts_code'].str.startswith('5') | data1['ts_code'].str.startswith('9'))]
except Exception as error:
print(error)
print('--------------------程序执行完毕!--------------------')
return data1
###################################################################################################################################
# 问财所属概念
def IwencaiConcept():
# 读取下载转存的csv文件(原始下载文件为xml格式)
stock_csv = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\行业概念.csv', encoding='gbk')[['股票代码', '股票简称', '所属概念', '所属同花顺行业']]
# 1、本地文本文件解析
dict = {}
data = pd.DataFrame()
for index, row in stock_csv.iterrows():
ts_code = row['股票代码']
concept = row['所属概念'].split(';')
for cp in concept:
dict = {'concept_name': cp, 'ts_code': ts_code}
data = data.append(dict, ignore_index=True)
data = data[['concept_name', 'ts_code']]
print('--------------------程序执行完毕!--------------------')
return data
###################################################################################################################################
# 聚宽所属概念
def JoinQuantConcept():
auth('18829345691', '345691')
data1 = get_concepts()
data1 = data1.reset_index(inplace=False)
data1 = data1.rename(columns={'index': 'concept_code', 'name': 'concept_name'})
dict2 = {}
data2 = pd.DataFrame()
for concept_code in data1['concept_code'].tolist():
for ts_code in get_concept_stocks(concept_code, date=None):
dict2 = {'concept_code': concept_code, 'ts_code': ts_code}
data2 = data2.append(dict2, ignore_index=True)
data = pd.merge(data1, data2)
data = data[['concept_name', 'ts_code']]
data['ts_code'] = data['ts_code'].apply(lambda x: (x[:6] + ".SH") if x.endswith('XSHG') == True else (x[:6] + ".SZ"))
print('--------------------程序执行完毕!--------------------')
return data
###################################################################################################################################
# 所属概念去重
def drop_duplicates():
engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
data1 = KplConcept()
data2 = IwencaiConcept()
#data3 = TuShareConcept()
#data4 = JoinQuantConcept()
#data = pd.concat([data1, data2, data3, data4], ignore_index=True).drop_duplicates()
data = pd.concat([data1, data2], ignore_index=True).drop_duplicates(inplace=False)
DeleteHandler('delete from concept_detail')
data.to_sql('concept_detail', con=engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
# 问财流通股东
def IwencaiHodler():
engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')
# 1、本地文本文件解析
# 统计实际流通股,持股比例低于5%
data = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\流通股东明细.csv', encoding='gbk')[['股票代码', '股票简称', '流通股东持股数量(股)', '流通股东持股比例(%)']]
data = data.rename(columns={'股票代码': 'stk_code', '股票简称': 'stk_name', '流通股东持股数量(股)': 'cir_num', '流通股东持股比例(%)': 'cir_rate'})
data = data.drop(data[data.cir_rate < 5].index)
data = data.groupby(['stk_code', 'stk_name']).sum()
data = data.reset_index()
DeleteHandler('delete from flow_holder')
data.to_sql('flow_holder', con=engine, if_exists='append', index=False)
print('--------------------程序执行完毕!--------------------')
###################################################################################################################################
# 开盘啦:精选概念
if __name__ == '__main__':
t1 = time.time()
pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')
IwencaiHodler()
drop_duplicates()
t2 = time.time()
print('本次程序运行时间为%s秒' % (t2 - t1))
标签:tushare,code,index,self,date,import,data
来源: https://www.cnblogs.com/Iceredtea/p/13437521.html