php tushare,tushare

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# Mysql常规操作

class MysqlHandler:

def __init__(self):

self.conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')

def delete(self, sql):

cursor = self.conn.cursor()

cursor.execute(sql)

self.conn.commit()

cursor.close()

self.conn.close()

###################################################################################################################################

# Tushare接口数据请求

# 1、股票列表

# 2、日线行情

# 3、资金流向

# 4、沪深股通

# 5、涨停跌停

# 6、shibor

# 7、所属概念

# 8、涨跌停价格

class TuShare:

def __init__(self):

self.date = time.strftime("%Y%m%d", time.localtime())

self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')

self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

def stock_basic(self):

data1 = self.pro.stock_basic(list_status='L', fields='ts_code,name,area,industry,market,exchange,list_date,is_hs')

data1['area'] = data1['area'].apply(lambda x: "广东" if (x == '深圳') == True else x)

stock_csv = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\行业概念.csv', encoding='gbk')[['股票代码', '股票简称', '所属概念', '所属同花顺行业']]

dict = {}

data2 = pd.DataFrame()

for index, row in stock_csv.iterrows():

stk_code = row['股票代码']

yjhy = (row['所属同花顺行业'].split('-'))[0]

ejhy = (row['所属同花顺行业'].split('-'))[1]

sjhy = (row['所属同花顺行业'].split('-'))[2]

dict = {'ts_code': stk_code, 'yjhy': yjhy, 'ejhy': ejhy, 'sjhy': sjhy}

data2 = data2.append(dict, ignore_index=True)

data2 = data2[['ts_code', 'yjhy', 'ejhy', 'sjhy']]

data = pd.merge(data1, data2, how='left', on='ts_code')

data.to_sql('stock_basic', con=self.engine, if_exists='append', index=False)

print(data.head(5))

print('--------------------程序执行完毕!--------------------')

def stock_daily(self):

data = self.pro.daily(trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

print(data.head(5))

data.to_sql('stock_daily', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def index_daily(self):

data1 = self.pro.index_daily(ts_code='000001.SH', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

data2 = self.pro.index_daily(ts_code='399001.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

data3 = self.pro.index_daily(ts_code='399006.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

data = pd.concat([data1, data2, data3], ignore_index=True)

print(data)

data.to_sql('index_daily', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def stock_limit_price(self):

data = self.pro.stk_limit(trade_date=self.date, fields='trade_date,ts_code,pre_close,up_limit,down_limit')

print(data.head(5))

data.to_sql('stock_limit_price', con=self.engine, if_exists='replace', index=False)

print('--------------------程序执行完毕!--------------------')

def stock_daily_basic(self):

data = self.pro.daily_basic(trade_date=self.date, fields='ts_code,trade_date,turnover_rate,turnover_rate_f,volume_ratio,total_share,float_share,free_share')

print(data.head(5))

data.to_sql('stock_daily_basic', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def stock_moneyflow(self):

data = self.pro.moneyflow(trade_date=self.date, fields='ts_code,trade_date,buy_md_amount,sell_md_amount,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount')

print(data.head(5))

data.to_sql('stock_moneyflow', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def hsgt_moneyflow(self):

data = self.pro.moneyflow_hsgt(trade_date=self.date, fields='trade_date,hgt,sgt,north_money')

print(data.head(5))

data.to_sql('hsgt_moneyflow', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def limit_list(self):

data = self.pro.limit_list(trade_date=self.date, fields='trade_date,ts_code,close,pct_chg,fc_ratio,fd_amount,first_time,last_time,open_times,strth,limit')

print(data.head(5))

data.to_sql('limit_stock_list', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def shibor(self):

data = self.pro.shibor(date=self.date, fields='date,on,1m')

print(data.head(5))

data.to_sql('shibor', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def pre_cal_date(self):

t1 = datetime.date.today()

t2 = t1 - datetime.timedelta(days=90)

start_date = t2.strftime('%Y%m%d')

end_date = t1.strftime('%Y%m%d')

data = self.pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, fields='exchange,cal_date,is_open,pretrade_date')

print(data.head(5))

data.to_sql('trade_cal_date', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

if __name__ == "__main__":

t1 = time.time()

conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')

cursor = conn.cursor()

cursor.execute('delete from stock_basic')

cursor.execute('delete from trade_cal_date')

conn.commit()

cursor.close()

conn.close()

obj1 = TuShare()

obj1.stock_daily()

obj1.index_daily()

obj1.stock_daily_basic()

obj1.stock_moneyflow()

obj1.hsgt_moneyflow()

obj1.limit_list()

obj1.shibor()

obj1.pre_cal_date()

obj1.stock_basic()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# Mysql常规操作

class MysqlHandler:

def __init__(self):

self.conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')

def delete(self, sql):

cursor = self.conn.cursor()

cursor.execute(sql)

self.conn.commit()

cursor.close()

self.conn.close()

###################################################################################################################################

# Tushare接口数据请求

# 1、股票列表

# 2、日线行情

# 3、资金流向

# 4、沪深股通

# 5、涨停跌停

# 6、shibor

# 7、所属概念

# 8、涨跌停价格

class TuShare:

def __init__(self):

self.date = time.strftime("%Y%m%d", time.localtime())

self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')

self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

def stock_basic(self):

data1 = self.pro.stock_basic(list_status='L', fields='ts_code,name,area,industry,market,exchange,list_date,is_hs')

data1['area'] = data1['area'].apply(lambda x: "广东" if (x == '深圳') == True else x)

stock_csv = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\行业概念.csv', encoding='gbk')[['股票代码', '股票简称', '所属概念', '所属同花顺行业']]

dict = {}

data2 = pd.DataFrame()

for index, row in stock_csv.iterrows():

stk_code = row['股票代码']

yjhy = (row['所属同花顺行业'].split('-'))[0]

ejhy = (row['所属同花顺行业'].split('-'))[1]

sjhy = (row['所属同花顺行业'].split('-'))[2]

dict = {'ts_code': stk_code, 'yjhy': yjhy, 'ejhy': ejhy, 'sjhy': sjhy}

data2 = data2.append(dict, ignore_index=True)

data2 = data2[['ts_code', 'yjhy', 'ejhy', 'sjhy']]

data = pd.merge(data1, data2, how='left', on='ts_code')

data.to_sql('stock_basic', con=self.engine, if_exists='append', index=False)

print(data.head(5))

print('--------------------程序执行完毕!--------------------')

def stock_daily(self):

data = self.pro.daily(trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

print(data.head(5))

data.to_sql('stock_daily', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def index_daily(self):

data1 = self.pro.index_daily(ts_code='000001.SH', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

data2 = self.pro.index_daily(ts_code='399001.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

data3 = self.pro.index_daily(ts_code='399006.SZ', trade_date=self.date, fields='ts_code,trade_date,open,high,low,close,pre_close,pct_chg,amount')

data = pd.concat([data1, data2, data3], ignore_index=True)

print(data)

data.to_sql('index_daily', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def stock_limit_price(self):

data = self.pro.stk_limit(trade_date=self.date, fields='trade_date,ts_code,pre_close,up_limit,down_limit')

print(data.head(5))

data.to_sql('stock_limit_price', con=self.engine, if_exists='replace', index=False)

print('--------------------程序执行完毕!--------------------')

def stock_daily_basic(self):

data = self.pro.daily_basic(trade_date=self.date, fields='ts_code,trade_date,turnover_rate,turnover_rate_f,volume_ratio,total_share,float_share,free_share')

print(data.head(5))

data.to_sql('stock_daily_basic', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def stock_moneyflow(self):

data = self.pro.moneyflow(trade_date=self.date, fields='ts_code,trade_date,buy_md_amount,sell_md_amount,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount')

print(data.head(5))

data.to_sql('stock_moneyflow', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def hsgt_moneyflow(self):

data = self.pro.moneyflow_hsgt(trade_date=self.date, fields='trade_date,hgt,sgt,north_money')

print(data.head(5))

data.to_sql('hsgt_moneyflow', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def limit_list(self):

data = self.pro.limit_list(trade_date=self.date, fields='trade_date,ts_code,close,pct_chg,fc_ratio,fd_amount,first_time,last_time,open_times,strth,limit')

print(data.head(5))

data.to_sql('limit_stock_list', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def shibor(self):

data = self.pro.shibor(date=self.date, fields='date,on,1m')

print(data.head(5))

data.to_sql('shibor', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def pre_cal_date(self):

t1 = datetime.date.today()

t2 = t1 - datetime.timedelta(days=90)

start_date = t2.strftime('%Y%m%d')

end_date = t1.strftime('%Y%m%d')

data = self.pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, fields='exchange,cal_date,is_open,pretrade_date')

print(data.head(5))

data.to_sql('trade_cal_date', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

if __name__ == "__main__":

t1 = time.time()

conn = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')

cursor = conn.cursor()

cursor.execute('delete from stock_basic')

cursor.execute('delete from trade_cal_date')

conn.commit()

cursor.close()

conn.close()

obj1 = TuShare()

obj1.stock_daily()

obj1.index_daily()

obj1.stock_daily_basic()

obj1.stock_moneyflow()

obj1.hsgt_moneyflow()

obj1.limit_list()

obj1.shibor()

obj1.pre_cal_date()

obj1.stock_basic()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import math

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# 东方财富爬虫

# 1、股票异动

# 2、沪深股通

class Eastmoney:

def __init__(self):

self.session = requests.Session()

self.date = time.strftime("%Y%m%d", time.localtime())

self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

self.timestamp = int(round(time.time() * 1000))

self.url1 = 'http://push2ex.eastmoney.com/getStockCountChanges?type=4,8,16,32,64,128,8193,8194,8201,8202,8203,8204,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216'

self.url2 = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216"

self.url3 = 'http://push2ex.eastmoney.com/getTopicZTPool?'

self.ua_list = [

'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'

]

def changes_max_page(self):

# 1、抓取网页数据

param = {"_": self.timestamp, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}

html = json.loads(self.session.get(url=self.url1, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text)

# 2、网页数据解析

self.changes_max_page = html['rt']

def changeSpider(self):

self.changes_max_page()

# 1、定义数据存储容器

dict = {}

data = pd.DataFrame()

# 2、爬取网页数据

for page in range(0, int(self.changes_max_page)):

param = {"pageindex": page, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}

html = json.loads(self.session.get(url=self.url2, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text)

# 3、网页数据解析

if html['data'] is None:

break

else:

allstock = html['data']['allstock']

for stock in allstock:

code = stock['c']

name = stock['n']

chg_type = stock['t']

if len(str(stock['tm'])) == 5:

chg_time = str(self.date) + '0' + str(stock['tm'])

else:

chg_time = str(self.date) + str(stock['tm'])

if chg_type in ('8201', '8202', '8207', '8209', '8211', '8215', '8204', '8203', '8208', '8210', '8212', '8216'):

chg_value = stock['i'] * 100

chg_index = '%'

elif chg_type in ('8193', '8194', '128', '64'):

chg_value = stock['i']

chg_index = '股'

else:

chg_value = stock['i']

chg_index = '元'

# 4、数据容器存储

dict = {'chg_time': chg_time, 'code': code, 'name': name, 'chg_type': chg_type, 'chg_value': chg_value, 'chg_index': chg_index}

data = data.append(dict, ignore_index=True)

print('--------------------程序执行完毕!共抓取%s页数据--------------------' % (page + 1))

data = data[['chg_time', 'code', 'name', 'chg_type', 'chg_value', 'chg_index']]

# 5、数据清洗

data = data[~ (data['code'].str.startswith('1') | data['code'].str.startswith('2') | data['code'].str.startswith('5') | data['code'].str.startswith('9'))]

data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))

print(data.head(5))

# 6、mysql持久化存储

data.to_sql('stock_change', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def limit(self):

i = math.floor(random.random() * 10000000 + 1)

t = int(time.time() * 1000)

p = 'callbackdata' + str(i + 1) + '('

q = ');'

param = {'cb': 'callbackdata' + str(i + 1), 'ut': '7eea3edcaed734bea9cbfc24409ed989', 'dpt': 'wz.ztzt', 'Pageindex': '0', 'pagesize': 920, 'sort': 'fbt:asc', 'date': self.date, '_': t}

html = self.session.get(url=self.url3, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text

html = json.loads(html.replace(p, '').replace(q, ''))

list = html['data']['pool']

dict = {}

data = pd.DataFrame()

for x in list:

date = self.date

code = x['c']

lbc = x['lbc']

hybk = x['hybk']

dict = {'date': date, 'code': code, 'lbc': lbc, 'hybk': hybk}

data = data.append(dict, ignore_index=True)

data = data[['date', 'code', 'lbc', 'hybk']]

data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))

print(data.head(5))

data.to_sql('limit_stock_lbc', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

# 主运行程序

class Main:

def main_Eastmoney(self):

obj1 = Eastmoney()

obj1.changeSpider()

obj1.limit()

###################################################################################################################################

if __name__ == "__main__":

t1 = time.time()

Main().main_Eastmoney()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# 开盘啦爬虫

# 1、风口概念

# 2、龙虎榜

class Kpl:

def __init__(self):

self.date = time.strftime("%Y-%m-%d", time.localtime())

self.session = requests.Session()

self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

self.url = 'https://pclhb.kaipanla.com/w1/api/index.php'

self.url1 = 'https://pchq.kaipanla.com/w1/api/index.php'

self.ua_list = [

'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'

]

def hotConceptSpider(self):

# 1、定义数据容器

code_list = []

dict = {}

data = pd.DataFrame()

# 2、参数构造

param = {'c': 'LongHuBang', 'a': 'GetStockList', 'st': 300, 'Time': str(self.date), 'UserID': 399083, 'Token': '71aef0e806e61ad3169ddc9473e37886'}

# 3、网页数据抓取

html = json.loads(self.session.post(url=self.url, headers={'User-Agent': random.choice(self.ua_list)}, data=param).text)['list']

# 4、网页数据解析

for j in html:

code = j['ID']

name = j['Name']

code_list.append(code)

if len(j['FengKou']) == 0:

hot_cept = '无'

else:

p = j['FengKou'].split(',')

for i in p:

hot_cept = i

dict = {'date': self.date, 'code': code, 'name': name, 'hot_cept': hot_cept}

data = data.append(dict, ignore_index=True)

# 5、重命名及顺序调整

data = data[['date', 'code', 'name', 'hot_cept']]

data = data[data['code'].str.startswith('0') | data['code'].str.startswith('3') | data['code'].str.startswith('6')]

data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))

data['date'] = data['date'].apply(lambda x: str(x).replace('-', ''))

print(data.head(5))

# 6、数据存储

print('--------------------程序执行完毕!--------------------')

data.to_sql('hot_concept', con=self.engine, if_exists='append', index=False)

return code_list

def lhbSpider(self, code_list):

for code in code_list:

# 2、参数构造

param = {'c': 'Stock', 'a': 'GetNewOneStockInfo', 'StockID': code, 'Time': self.date, 'UserID': '399083', 'Token': '71aef0e806e61ad3169ddc9473e37886'}

# 3、网页数据抓取

html = json.loads(self.session.post(url=self.url, headers={'User-Agent': random.choice(self.ua_list)}, data=param).text)['List']

dict1 = {}

dict2 = {}

data1 = pd.DataFrame()

data2 = pd.DataFrame()

data = pd.DataFrame()

# 4、网页数据解析

for j in html:

for b in j['BuyList']:

date = self.date

code = code

direction = 'buy'

dname = b['Name']

buy = b['Buy']

sell = b['Sell']

try:

if b['GroupIcon'][0] == None:

groupicon = '空'

else:

groupicon = b['GroupIcon'][0]

except:

groupicon = '空'

youziicon = b['YouZiIcon']

dict1 = {'date': date, 'code': code, 'direction': direction, 'dname': dname, 'buy': buy, 'sell': sell, 'groupicon': groupicon, 'youziicon': youziicon}

data1 = data1.append(dict1, ignore_index=True)

for b in j['SellList']:

date = self.date

code = code

direction = 'sell'

dname = b['Name']

buy = b['Buy']

sell = b['Sell']

try:

if b['GroupIcon'][0] == None:

groupicon = '空'

else:

groupicon = b['GroupIcon'][0]

except:

groupicon = '空'

youziicon = b['YouZiIcon']

dict2 = {'date': date, 'code': code, 'direction': direction, 'dname': dname, 'buy': buy, 'sell': sell, 'groupicon': groupicon, 'youziicon': youziicon}

data2 = data2.append(dict2, ignore_index=True)

# 5、数据处理

data = pd.concat([data1, data2], axis=0)

data = data[['date', 'code', 'direction', 'dname', 'buy', 'sell', 'groupicon', 'youziicon']]

data = data[data['code'].str.startswith('0') | data['code'].str.startswith('3') | data['code'].str.startswith('6')]

data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))

data['date'] = data['date'].apply(lambda x: str(x).replace('-', ''))

data.to_sql('lhb_list', con=self.engine, if_exists='append', index=False)

print(data.head(5))

# mysql数据存储

print('--------------------程序执行完毕!--------------------')

def scqxSpider(self):

# 1、参数构造

param = {'c': 'PCArrangeData', 'a': 'GetIndexPlate', 'st': 3, 'StockID': 'SH000001', 'UserID': 399083, 'Token': '2292739880d01bd81e169e90a1898ebe'}

# 2、网页数据抓取

html = json.loads(self.session.post(url=self.url1, headers={'User-Agent': random.choice(self.ua_list)}, data=param).text)['Mood']

dict = {

'zrzt': html['ZRZT'], # 自然涨停

'ztzb': html['ZTZB'], # 涨停炸板

'szjs': html['SZJS'], # 上涨家数

'xdjs': html['XDJS'], # 下跌家数

'zt': html['ZT'], # 涨停家数

'dt': html['DT'], # 跌停家数

'zbl': html['ZBL'], # 炸板率

'cgl': html['CGL'], # 昨日打板成功率

'yll': html['YLL'], # 昨日打板盈利率

'day': html['day'] # 交易日期

}

data = pd.DataFrame(dict, index=[0])

data = data[['day', 'zrzt', 'ztzb', 'szjs', 'xdjs', 'zt', 'dt', 'zbl', 'cgl', 'yll']]

data.to_sql('scqx', con=self.engine, if_exists='append', index=False)

print(data)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

# 主运行程序

class Main:

def main_Kpl(self):

obj1 = Kpl()

code_list = obj1.hotConceptSpider()

obj1.lhbSpider(code_list)

obj1.scqxSpider()

###################################################################################################################################

if __name__ == "__main__":

t1 = time.time()

Main().main_Kpl()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# code标准化

class CodeHandler:

def __init__(self):

self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')

def get_all_securities(self):

self.stocks = self.pro.stock_basic(list_status='L', fields='ts_code,exchange')

def jq_code_normalize(self):

self.get_all_securities()

stocks = self.stocks['ts_code'].apply(lambda x: (x[:7] + "XSHG") if x.endswith('SH') == True else (x[:7] + "XSHE")).tolist()

return stocks

def ts_code_normalize(self):

self.get_all_securities()

stocks = self.stocks['ts_code'].tolist()

return stocks

def no_code_normalize(self):

self.get_all_securities()

stocks = self.stocks['ts_code'].apply(lambda x: x[:6])

return stocks

def else_code_normalize(self):

self.get_all_securities()

stocks = self.stocks['ts_code'].apply(lambda x: ('SH' + x[:6]) if x.endswith('SH') == True else ('SZ' + x[:6])).tolist()

return stocks

###################################################################################################################################

# 新浪财经爬虫

# 1、个股成交分价表

def divCostSpider(code):

# 1、定义数据容器

try:

dict = {}

data = pd.DataFrame()

engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

# 2、构造URL

date = time.strftime("%Y%m%d", time.localtime())

ua_list = [

'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'

]

url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_price.php?symbol=' + code.lower()

# 3、抓取网页数据

html = requests.get(url=url, headers={'User-Agent': random.choice(ua_list)}).text

# 4、网页数据解析

tree = etree.HTML(html)

tr_list = tree.xpath('//*[@id="divListTemplate"]/table/tbody/tr')

for tr in tr_list:

price = tr.xpath('.//td[1]/text()')[0].strip()

volume = tr.xpath('.//td[2]/text()')[0].strip()

buyrate = str(tr.xpath('.//td[3]/text()')[0].strip()).replace('%', '')

# 5、容器存储

dict = {'code': code, 'date': date, 'price': price, 'volume': volume, 'buyrate': buyrate}

data = data.append(dict, ignore_index=True)

# print('--------------------程序执行完毕!%s数据已抓取--------------------' % code)

data = data[['code', 'date', 'price', 'volume', 'buyrate']]

data['code'] = data['code'].apply(lambda x: (x[2:] + ".SH") if x.startswith('SH') == True else (x[2:] + ".SZ"))

# print(data.head(5))

# 6、mysql持久化存储

data.to_sql('stock_divcost', con=engine, if_exists='append', index=False)

except:

print('%s数据抓取出现问题' % code)

###################################################################################################################################

# 主运行程序

class Main:

def main_Sina(self):

obj1 = CodeHandler()

code_list = obj1.else_code_normalize()

pool = Pool()

pool.map(divCostSpider, code_list)

pool.close()

pool.join()

###################################################################################################################################

if __name__ == "__main__":

t1 = time.time()

Main().main_Sina()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import math

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# 东方财富爬虫

# 1、股票异动

# 2、沪深股通

class Eastmoney:

def __init__(self):

self.session = requests.Session()

self.engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

self.pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')

self.url1 = 'http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get?'

self.ua_list = [

'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'

]

self.sql = '''SELECT pretrade_date FROM trade_cal_date WHERE exchange = 'SSE' AND is_open = 1 ORDER BY cal_date DESC LIMIT 1'''

self.dataframe = pd.read_sql(self.sql,self.engine)['pretrade_date'].tolist()

print(self.dataframe)

#self.pretrade_date = str(self.dataframe[0][:4]) + '-' + str(self.dataframe[0][4:6]) + '-' + str(self.dataframe[0][6:])

self.pretrade_date = '2020-07-22'

print(self.pretrade_date)

def hsgt_max_page(self):

param = {'type': 'HSGT20_GGTJ_SUM',

'token': '894050c76af8597a853f5b408b759f5d',

'st': 'ShareSZ_Chg_One',

'sr': '-1',

'p': '1',

'ps': '50',

'js': 'var eAEIQxxY={pages:(tp),data:(x)}',

'filter': "(DateType='1' and HdDate='{}')".format(self.pretrade_date),

'rt': '53077355'}

html = self.session.get(url=self.url1, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text

m = re.compile('var.*?{pages:(\d+),data:.*?')

p = re.search(m, html)

self.hsgt_max_page = p.group(1)

def hsgtMoneySpider(self):

self.hsgt_max_page()

dict = {}

data = pd.DataFrame()

for page in range(0, int(self.hsgt_max_page)):

param = {'type': 'HSGT20_GGTJ_SUM',

'token': '894050c76af8597a853f5b408b759f5d',

'st': 'ShareSZ_Chg_One',

'sr': '-1',

'p': page,

'ps': '50',

'js': 'var eAEIQxxY={pages:(tp),data:(x)}',

'filter': "(DateType='1' and HdDate='{}')".format(self.pretrade_date),

'rt': '53077355'}

html = self.session.get(url=self.url1, params=param, headers={'User-Agent': random.choice(self.ua_list)}).text

# 提取出list,可以使用json.dumps和json.loads

q = re.compile('var.*?data:(.*)}', re.S)

i = re.search(q, html)

for p in json.loads(i.group(1)):

date = p['HdDate'] # 日期

code = p['SCode'] # 代码

name = p['SName'] # 名称

hyname = p['HYName'] # 所属板块

dqname = p['DQName'] # 地区板块

newprice = p['NewPrice'] # 今日收盘价

zdf = p['Zdf'] # 今日涨跌幅

sharehold = p['ShareHold'] # 今日持股股数,无单位

sharesz = p['ShareSZ'] # 今日持股市值,无单位

ltzb = p['LTZB'] # 今日持股占流通股比,无单位

sharesrate = p['SharesRate'] # 今日持股占总股本比,单位%

sharehold_chg_one = p['ShareHold_Chg_One'] # 今日增持股数,无单位

sharesz_chg_one = p['ShareSZ_Chg_One'] # 今日增持市值,无单位

ltzb_one = p['LTZB_One'] # 今日增持占流通股比,无单位

zzb_one = p['ZZB_One'] # 今日增持占总股本比,无单位

dict = {

'date': self.pretrade_date,

'code': code,

'hyname': hyname,

'dqname': dqname,

'newprice': newprice,

'zdf': zdf,

'sharehold': sharehold,

'sharesz': sharesz,

'ltzb': ltzb,

'sharesrate': sharesrate,

'sharehold_chg_one': sharehold_chg_one,

'sharesz_chg_one': sharesz_chg_one,

'ltzb_one': ltzb_one,

'zzb_one': zzb_one

}

data = data.append(dict, ignore_index=True)

data = data[['date', 'code', 'hyname', 'dqname', 'newprice', 'zdf', 'sharehold', 'sharesz', 'ltzb', 'sharesrate', 'sharehold_chg_one', 'sharesz_chg_one', 'ltzb_one', 'zzb_one']]

data['code'] = data['code'].apply(lambda x: (x + ".SH") if x.startswith('6') == True else (x + ".SZ"))

data['date'] = data['date'].apply(lambda x: str(x).replace('-', ''))

print(data.head(5))

data.to_sql('hsgt_detail', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

def pre_cal_date(self):

t1 = datetime.date.today()

t2 = t1 - datetime.timedelta(days=90)

start_date = t2.strftime('%Y%m%d')

end_date = t1.strftime('%Y%m%d')

data = self.pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, fields='exchange,cal_date,is_open,pretrade_date')

print(data.head(5))

data.to_sql('trade_cal_date', con=self.engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

# 主运行程序

class Main:

def main_Eastmoney(self):

obj1 = Eastmoney()

obj1.pre_cal_date()

obj1.hsgtMoneySpider()

###################################################################################################################################

if __name__ == "__main__":

t1 = time.time()

Main().main_Eastmoney()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

import requests

from jqdatasdk import *

import tushare as ts

from bs4 import BeautifulSoup

from lxml import etree

from multiprocessing import Pool

import multiprocessing

import time

import datetime

import random

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

import pymysql

import json

import re

import warnings

warnings.filterwarnings("ignore")

###################################################################################################################################

# Mysql常规操作

def DeleteHandler(sql):

con = pymysql.connect("127.0.0.1", "root", "test", "quant_db", charset='utf8')

cursor = con.cursor()

cursor.execute(sql)

cursor.close()

###################################################################################################################################

# Tushare所属概念

def TuShareConcept():

concept_id = (pro.concept(fields='code,name'))['code'].tolist()

visit_count = 0

data = pd.DataFrame()

for id in concept_id:

if (visit_count + 1) % 199 == 0:

time.sleep(10)

else:

data = data.append(pro.concept_detail(id=id, fields='concept_name,ts_code'), ignore_index=True)

visit_count += 1

data = data[['concept_name', 'ts_code']]

print('--------------------程序执行完毕!--------------------')

return data

###################################################################################################################################

# 开盘啦所属概念

def KplConcept():

try:

date = time.strftime("%Y-%m-%d", time.localtime())

code_list = pro.stock_basic(list_status='L', fields='ts_code,name')['ts_code'].tolist()

# 1、网页数据抓取

url = 'https://pchq.kaipanla.com/w1/api/index.php'

engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

ua_list = [

'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'

]

dict1 = {}

data1 = pd.DataFrame()

visit_count = 0

for code in code_list:

if (visit_count + 1) % 400 == 0:

time.sleep(60)

else:

data = {'c': 'PCArrangeData',

'a': 'GetHQPlate',

'StockID': code[:6],

'Day': date,

'time': '15:00',

'SelType': '1, 2, 3, 8, 9, 5, 6, 7',

'UserID': 399083,

'Token': '71aef0e806e61ad3169ddc9473e37886'

}

html = requests.post(url=url, data=data, headers={'User-Agent': random.choice(ua_list)}).text

# 2、网页数据解析

ts_code = json.loads(html)['trend']['code']

cp_list = json.loads(html)['stockplate']

for cp in cp_list:

concept = cp[0]

dict1 = {'concept_name': concept, 'ts_code': code}

data1 = data1.append(dict1, ignore_index=True)

data1 = data1[['concept_name', 'ts_code']]

data1 = data1[~ (data1['ts_code'].str.startswith('1') | data1['ts_code'].str.startswith('2') | data1['ts_code'].str.startswith('5') | data1['ts_code'].str.startswith('9'))]

except Exception as error:

print(error)

print('--------------------程序执行完毕!--------------------')

return data1

###################################################################################################################################

# 问财所属概念

def IwencaiConcept():

# 读取下载转存的csv文件(原始下载文件为xml格式)

stock_csv = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\行业概念.csv', encoding='gbk')[['股票代码', '股票简称', '所属概念', '所属同花顺行业']]

# 1、本地文本文件解析

dict = {}

data = pd.DataFrame()

for index, row in stock_csv.iterrows():

ts_code = row['股票代码']

concept = row['所属概念'].split(';')

for cp in concept:

dict = {'concept_name': cp, 'ts_code': ts_code}

data = data.append(dict, ignore_index=True)

data = data[['concept_name', 'ts_code']]

print('--------------------程序执行完毕!--------------------')

return data

###################################################################################################################################

# 聚宽所属概念

def JoinQuantConcept():

auth('18829345691', '345691')

data1 = get_concepts()

data1 = data1.reset_index(inplace=False)

data1 = data1.rename(columns={'index': 'concept_code', 'name': 'concept_name'})

dict2 = {}

data2 = pd.DataFrame()

for concept_code in data1['concept_code'].tolist():

for ts_code in get_concept_stocks(concept_code, date=None):

dict2 = {'concept_code': concept_code, 'ts_code': ts_code}

data2 = data2.append(dict2, ignore_index=True)

data = pd.merge(data1, data2)

data = data[['concept_name', 'ts_code']]

data['ts_code'] = data['ts_code'].apply(lambda x: (x[:6] + ".SH") if x.endswith('XSHG') == True else (x[:6] + ".SZ"))

print('--------------------程序执行完毕!--------------------')

return data

###################################################################################################################################

# 所属概念去重

def drop_duplicates():

engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

data1 = KplConcept()

data2 = IwencaiConcept()

#data3 = TuShareConcept()

#data4 = JoinQuantConcept()

#data = pd.concat([data1, data2, data3, data4], ignore_index=True).drop_duplicates()

data = pd.concat([data1, data2], ignore_index=True).drop_duplicates(inplace=False)

DeleteHandler('delete from concept_detail')

data.to_sql('concept_detail', con=engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

# 问财流通股东

def IwencaiHodler():

engine = create_engine('mysql+pymysql://root:test@127.0.0.1:3306/quant_db?charset=utf8')

# 1、本地文本文件解析

# 统计实际流通股,持股比例低于5%

data = pd.read_csv('C:\\Users\\Red-tea-1919\\Desktop\\Stock\\流通股东明细.csv', encoding='gbk')[['股票代码', '股票简称', '流通股东持股数量(股)', '流通股东持股比例(%)']]

data = data.rename(columns={'股票代码': 'stk_code', '股票简称': 'stk_name', '流通股东持股数量(股)': 'cir_num', '流通股东持股比例(%)': 'cir_rate'})

data = data.drop(data[data.cir_rate < 5].index)

data = data.groupby(['stk_code', 'stk_name']).sum()

data = data.reset_index()

DeleteHandler('delete from flow_holder')

data.to_sql('flow_holder', con=engine, if_exists='append', index=False)

print('--------------------程序执行完毕!--------------------')

###################################################################################################################################

# 开盘啦:精选概念

if __name__ == '__main__':

t1 = time.time()

pro = ts.pro_api('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')

IwencaiHodler()

drop_duplicates()

t2 = time.time()

print('本次程序运行时间为%s秒' % (t2 - t1))

标签:tushare,code,index,self,date,import,data

来源: https://www.cnblogs.com/Iceredtea/p/13437521.html

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值