交易策略:
-- Buy & Hold
运行:
python main.py --companies="^TNX, AAPL,MSFT, BTC-USD" --ratios="0.1,0.4, 0.2, 0.3"
结果:
sharpe sortino max_drawdown cagr ratio
company
^TNX 0.384590 0.600018 -84.5% 10.7% 10.0%
AAPL 0.815257 1.185914 -38.5% 30.6% 40.0%
MSFT 0.778694 1.134071 -31.6% 26.9% 20.0%
BTC-USD 0.626151 0.914680 -83.3% 21.3% 30.0%
Portfolio 0.926480 1.334484 -33.2% 36.0% 100.0%
注:股票组合明显降低了风险,Sortino Ratio体现了这点。
– main.py
import requests
import pandas as pd
import json,os
import numpy as np
import datetime
import matplotlib.pyplot as plt
# Candlesticks
endpoint = 'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?interval=1d&period1={startts}&period2={endts}'
def fetch( symbol, startts: int, endts: int ):
import os
fn = f'{symbol}_{startts}_{endts}.csv'
if os.path.exists(fn):
return pd.read_csv(fn, index_col=0 )
url = endpoint.format( symbol=symbol,startts=startts, endts=endts )
print( url )
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
resp = requests.get( url, headers=headers )
data = resp.json()
#print( json.dumps( data, indent=" ") )
result = data["chart"]["result"][0]
meta = result["meta"]
currency = meta['currency']
timestamps = result["timestamp"]
ohlcv = result["indicators"]["quote"][0]
opens = ohlcv["open"]; closes = ohlcv["close"]; highs = ohlcv["high"]; lows = ohlcv["low"]; vols = ohlcv["volume"]
adjclose = result["indicators"]["adjclose"][0]["adjclose"]
df = pd.DataFrame( list(zip( timestamps, opens, highs, lows, closes, adjclose ) ), columns=[
'Date', 'Open','High','Low','Close','AdjClose'] )
df.Date = df.Date.apply(datetime.datetime.fromtimestamp)
df.Date = df.Date.apply(lambda d: datetime.datetime(year=d.year, month=d.month, day=d.day, hour=21, minute=30, second=0))
df.Date = df.Date.apply(pd.Timestamp)
df.set_index('Date', inplace=True)
df['Returns'] = df.AdjClose.pct_change()
df = df[1:]
print( '-- ', symbol )
print( '-- currency: ', currency )
df.Returns.to_csv( fn )
df = fetch(symbol, startts, endts )
return df
def _load(symbol):
df = pd.read_csv( f'{symbol}.csv', index_col=0)
return df
def calc_metrics( companies, ratios, startts: int, endts: int):
print('*'*50)
print('-- calc_metrics ')
s = np.sum( ratios )
print( '-- ratios: ', ratios )
print( '-- Total (ratios): ', np.sum( ratios ) )
if s != 1:
ratios = np.array(ratios)/s
print( '-- ratios(modified): ', ratios )
dfs = []
for symbol in companies:
x = fetch(symbol, startts, endts )
dfs += [ x ]
#print( symbol )
#print( x )
df = dfs[0]
for f in dfs[1:]:
df = pd.merge(df, f, how='outer', right_index=True, left_index=True).fillna(0)
df.columns=companies
df['Total'] = df.iloc[:,0] * ratios[0]
for i in range(1, len(ratios) ):
df['Total'] += df.iloc[:,i] * ratios[i]
print(df)
n0 = df.shape[0]
foo = df.dropna()
df = df.fillna(0).astype(float)
n1 = foo.shape[0]
print(df )
from portfolio_stats import sharpe,sortino,max_drawdowns,annual_returns
sp, so, md, rtns = [], [], [], []
for col in df.columns:
sp += [ sharpe( df[ col ] ) ]
so += [ sortino( df[col] ) ]
md += [ max_drawdowns( df[col] ) ]
rtns += [ annual_returns( df[col], 252 ) ]
stat = pd.DataFrame( list( zip( companies +['Portfolio'], sp, so, md, rtns, list(ratios)+[np.sum(ratios)] ) ),
columns=[ 'company', 'sharpe', 'sortino', 'max_drawdown', 'cagr', 'ratio'] )
stat.set_index('company', inplace=True)
stat.cagr = stat.cagr.apply(lambda e: f"{ int(e*1000)/10}%")
stat.max_drawdown = stat.max_drawdown.apply(lambda e: f"{ int(e*1000)/10}%")
stat.ratio = stat.ratio.apply(lambda e: f"{ int(e*1000)/10}%")
print( stat )
return {'sharpe': sp,
'sortino': so,
'max_down': md,
'returns': rtns,
'names': companies+['Portfolio'],
'mismatch_nan': n1-n0,
'total_count': df.shape[0] }
import click
@click.command()
@click.option('--companies',help="Comma separated list of symbols")
@click.option('--ratios', help="Comma separated float numbers, should be sum to 1")
def main( companies, ratios ):
d = datetime.datetime.utcnow().date()
endts = datetime.datetime(year=d.year,month=d.month,day=d.day)
startts = endts - datetime.timedelta(days=365*5)
endts = int(endts.timestamp())
startts = int(startts.timestamp() )
#companies = [ 'AAPL', 'BABA', '^TNX', '002594.SZ']
#ratios = [1, 0.1, 0.5, 0.2]
companies = list(map(lambda e: e.strip(), companies.split(",")))
ratios = list(map(lambda v: float(v), ratios.split(",")))
r = calc_metrics( companies, ratios, startts=startts, endts=endts )
sp = r['sharpe'];so = r['sortino']; md=r['max_down']; rtns = r['returns']
if __name__ == '__main__':
main()
– portfolio_stats.py
import pandas as pd
import numpy as np
import datetime
APPROX_DAILY_TRADING_HOURS = 24
APPROX_BDAYS_PER_MONTH = 21
APPROX_BDAYS_PER_YEAR = 252
APPROX_MINUTES_PER_YEAR = APPROX_BDAYS_PER_YEAR * APPROX_DAILY_TRADING_HOURS * 12
MONTHS_PER_YEAR = 12
WEEKS_PER_YEAR = 52
QTRS_PER_YEAR = 4
MINUTELY = 'minutely'
DAILY = 'daily'
WEEKLY = 'weekly'
MONTHLY = 'monthly'
QUARTERLY = 'quarterly'
YEARLY = 'yearly'
ANNUALIZATION_FACTORS = {
MINUTELY: APPROX_MINUTES_PER_YEAR,
DAILY: APPROX_BDAYS_PER_YEAR,
WEEKLY: WEEKS_PER_YEAR,
MONTHLY: MONTHS_PER_YEAR,
QUARTERLY: QTRS_PER_YEAR,
YEARLY: 1
}
def _perf(returns: pd.Series, metric ):
ts = list( map( lambda e: datetime.datetime.strptime(e, '%Y-%m-%d %H:%M:%S').timestamp(), returns.index.values ) )
dts = (24*3600)/np.nanmin( np.diff(ts))
print('-- smallest time gap: ', dts, ' (days)' )
dts *= 252
if metric == 'sortino':
downside = np.sqrt((returns[returns < 0] ** 2).sum() / len(returns))
if downside == 0:
return np.nan
res = returns.mean() / downside
elif metric == 'sharpe':
divisor = returns.std(ddof=1)
assert divisor>0, "Impossible for std to be zero."
res = returns.mean() / divisor
return res * np.sqrt( dts ) # Convert to annual
def sharpe( returns: pd.Series ) -> float:
return _perf( returns, 'sharpe' )
def sortino(returns: pd.Series) -> float:
return _perf( returns, 'sortino')
def max_drawdowns(returns: pd.Series) -> float:
out = np.empty(returns.shape[1:])
returns_1d = returns.ndim == 1
if len(returns) < 1:
out[()] = np.nan
if returns_1d:
out = out.item()
return out
returns_array = np.asanyarray(returns)
cumulative = np.empty(
(returns.shape[0] + 1,) + returns.shape[1:],
dtype='float64',
)
cumulative[0] = start = 100
cum_returns(returns_array, starting_value=start, out=cumulative[1:])
max_return = np.fmax.accumulate(cumulative, axis=0)
np.nanmin((cumulative - max_return) / max_return, axis=0, out=out)
if returns_1d:
out = out.item()
return out
def cum_returns(returns, starting_value=0, out=None):
"""
Compute cumulative returns from simple returns.
Parameters
----------
returns : pd.Series, np.ndarray, or pd.DataFrame
Returns of the strategy as a percentage, noncumulative.
- Time series with decimal returns.
- Example::
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902
- Also accepts two dimensional data. In this case, each column is
cumulated.
starting_value : float, optional
The starting returns.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
cumulative_returns : array-like
Series of cumulative returns.
"""
if len(returns) < 1:
return returns.copy()
nanmask = np.isnan(returns)
if np.any(nanmask):
returns = returns.copy()
returns[nanmask] = 0
allocated_output = out is None
if allocated_output:
out = np.empty_like(returns)
np.add(returns, 1, out=out)
out.cumprod(axis=0, out=out)
if starting_value == 0:
np.subtract(out, 1, out=out)
else:
np.multiply(out, starting_value, out=out)
if allocated_output:
if returns.ndim == 1 and isinstance(returns, pd.Series):
out = pd.Series(out, index=returns.index)
elif isinstance(returns, pd.DataFrame):
out = pd.DataFrame(
out, index=returns.index, columns=returns.columns,
)
return out
def annual_returns(returns, annualization=None):
"""
Determines the mean annual growth rate of returns. This is equivilent
to the compound annual growth rate.
Parameters
----------
returns : pd.Series or np.ndarray
Periodic returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
annualization : int, optional
Suppress the `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
Returns
-------
annual_return : float
Annual Return as CAGR (Compounded Annual Growth Rate).
"""
if len(returns) < 1:
return np.nan
if annualization is None: # By default, returns are assumed to be Daily
returns = returns.dropna().resample('1D').sum()
ann_factor = annualization_factor(DAILY, annualization)
num_years = len(returns) / ann_factor
# Pass array to ensure index -1 looks up successfully.
ending_value = cum_returns_final(returns)
r = ending_value ** (1 / num_years) - 1
return r
def annualization_factor(period, annualization) -> float:
if annualization is None:
try:
factor = ANNUALIZATION_FACTORS[period]
except KeyError:
raise ValueError(
"Period cannot be '{}'. "
"Can be '{}'.".format(
period, "', '".join(ANNUALIZATION_FACTORS.keys())
)
)
else:
factor = annualization
return factor
def cum_returns_final(returns) -> float:
if len(returns) == 0:
return np.nan
result = np.nanprod(returns + 1, axis=0)
return result