def judge_buy_point(stock_code,stock_name):
days = 30
data = get_stock_data(stock_code, days)
stock_data = np.asarray(data['close'])
average_k = 15
average_data = []
average_ratio = []
for i in range(average_k, len(stock_data)):
average_data.append(sum(stock_data[i - average_k:i]) / average_k)
if i == average_k:
average_ratio.append(0)
else:
average_ratio.append((average_data[-1] - average_data[-2]) / average_data[-1])
buy_count = 0
sum_money = 0
shares = 0
profit = 0
stop_win = 0.03
stop_loss = -0.02
buy_price = 0
if stock_data[days - 1] < average_data[days - 1 - average_k] and average_ratio[days - 1 - average_k] > 0.004 and buy_count == 0:
buy_count = 1
buy_price = stock_data[days - 1]
print(stock_name,stock_code, "买入", buy_price)
securities = jq.get_all_securities(["stock"],"20220101")
print(len(securities))
for i in range(len(securities)):
stock_code = securities.index[i]
stock_name = securities.loc[stock_code]['display_name']
# print(stock_code,stock_name)
judge_buy_point(stock_code,stock_name)
save_dict = {}
def job(symbol,name):
global save_dict
stock_daily_df = ak.stock_zh_a_daily(symbol)
save_dict[name] = [stock_daily_df]
def multi_save_all_stock_data():
start = time.clock()
global save_dict
stock_zh_a_spot_df = ak.stock_zh_a_spot()
print(stock_zh_a_spot_df.tail(10))
symbols = stock_zh_a_spot_df["代码"]
names = stock_zh_a_spot_df["名称"]
pool = multiprocessing.Pool(8)
for i in range(100):
print(i,len(symbols),names[i])
pool.apply_async(func=job, args=(symbols[i], names[i]))
df = pd.DataFrame(save_dict)
df.to_pickle("stock.pkl")
end = time.clock()
print(end - time)
数据通信
start = time.clock()
multiprocessing.freeze_support()
pool = multiprocessing.Pool()
manager = multiprocessing.Manager()
save_dict = manager.dict()
lock = manager.Lock()
for i in range(100):
pool.apply_async(func=tt_job, args=(i, i,save_dict,lock))
while True:
if save_dict.__len__() == 100:
break
time.sleep(2)
end = time.clock()
print(end - start)
def cal_consume_time(fn):
def cal_time(*args):
start = time.time()
fn(*args)
time.sleep(1)
end = time.time()
print(fn.__name__,"func consume time is: ", end-start)
return cal_time
计算当前收益率:
def get_stock_data_by_datetime(stock_code, start_date, end_date, days = 0, fields=['close']):
if days == 0:
data = jq.get_price(stock_code,start_date = start_date, end_date=end_date, frequency='daily', fields=fields)
else:
data = jq.get_price(stock_code, end_date = end_date, count = days, frequency='daily', fields=fields)
return data
def get_today_profit(stock_code):
data = get_stock_data(stock_code, 2)
pre_price = data.iloc[0, 0]
cur_price = data.iloc[1, 0]
profit = (cur_price - pre_price) / pre_price
print(pre_price,cur_price,profit)
return profit
基金数据分析
df = ak.fund_value_estimation_em("股票型")
df = df[["基金代码","基金名称"]]
def get_fund_analysis_data(fund_code):
df = ak.fund_etf_fund_info_em(fund_code, "20200317", "20230329")
# print(len(df))
if len(df) < 700:
return 1
prices = np.asarray(df["单位净值"])
mean = df["单位净值"].mean()
median = df["单位净值"].median()
std = df["单位净值"].std()
cur_price = prices[-1]
pre_price = prices[-22]
print(cur_price,pre_price,(cur_price - pre_price) / pre_price, (cur_price - min(prices)) / (max(prices)- min(prices)))
s, loc, scale = st.lognorm.fit(prices)
p = st.lognorm.cdf(cur_price,s=s, loc=loc, scale=scale)
return p
def travel_fund():
for i in range(len(df)):
fund_code = df.iloc[i, 0]
fund_name = df.iloc[i, 1]
p = get_fund_analysis_data(fund_code)
if p < 0.05:
print(fund_code, fund_name, p)
数据回测
def mode_pre():
all_data = read_factor_data()
dates = sorted(all_data.index.drop_duplicates(), reverse=True)
all_data["profit"] = all_data["profit"].fillna(axis=0, method="ffill")
all_data = all_data[~all_data.isin([np.inf, -np.inf])].dropna(axis=0)
if model_type == "lasso":
model = Lasso(alpha=0.001)
coefs = pd.read_csv(root_path + "lasso_coef.csv")
model.coef_ = coefs["coefficients"].values
model.intercept_ = coefs["intercept"].values[0]
elif model_type == "lstm":
model = load_model(root_path + 'callback_factor_lstm.h5')
else:
model = sm.load_pickle(root_path + model_name)
price_thres = 30
top_k = 10
init_money = 50000
for date in dates[0:1]:
df = all_data.loc[date, :]
y = df["profit"]
merge_df = df[["name", "code", "profit", "close"]]
df = df.drop(["name", "code", "close", "profit"], axis=1)
# print(df.columns)
# print(df)
cols = df.columns # 获得数据框的列名
for col in cols: # 循环读取每列
df[col] = winsorize(df[col])
df = standardize(df)
if model_type == "lstm":
df = np.asarray(df).reshape((len(df), 1, 17))
result = model.predict(df)
merge_df[0] = result
merge_df.sort_values(by=0, inplace=True, ascending=False)
merge_df = merge_df[merge_df["close"] < price_thres]
merge_df["num"] = np.floor(merge_df[0] / merge_df[0].sum() * init_money / merge_df["close"] / 100) * 100
merge_df["num"] = merge_df["num"].apply(lambda x: 100 if x < 100 else x)
merge_df = merge_df.head(top_k)
merge_df = merge_df.rename(columns={"close": "pre_close", "name": "pre_name"})
merge_df.sort_values(by="pre_name", inplace=True, ascending=False)
stock_codes = merge_df["code"]
new_data = qs.get_data(stock_codes, start="2023-05-15", end="2023-05-15", freq='d', fqt=1)[["close","name"]]
new_data.sort_values(by="name", inplace=True, ascending=False)
new_data.index = merge_df.index
merge_df[["close","name"]] = new_data[["close","name"]]
merge_df["new_profit"] = merge_df["close"] / merge_df["pre_close"] - 1
merge_df["earn"] = merge_df["pre_close"] * merge_df["num"] * merge_df["new_profit"]
merge_df["cost"] = merge_df["pre_close"] * merge_df["num"] * 0.001 + 10
merge_df["retained_profit"] = merge_df["earn"] - merge_df["cost"]
merge_df = merge_df.drop(["profit","code"],axis=1)
print(merge_df)
print(merge_df["retained_profit"].sum())