def generate_features(df):
""" Generate features for a stock/index based on historical price and performance
Args:
df (dataframe with columns "Open", "Close", "High", "Low", "Volume", "Adjusted Close")
Returns:
dataframe, data set with new features
"""
df_new = pd.DataFrame()
# 6 original features
df_new['open'] = df['Open']
df_new['open_1'] = df['Open'].shift(1)
df_new['close_1'] = df['Close'].shift(1)
df_new['high_1'] = df['High'].shift(1)
df_new['low_1'] = df['Low'].shift(1)
df_new['volume_1'] = df['Volume'].shift(1)
# 31 original features
# average price
df_new['avg_price_5'] = pd.rolling_mean(df['Close'], window=5).shift(1)
df_new['avg_price_30'] = pd.rolling_mean(df['Close'], window=21).shift(1)
df_new['avg_price_365'] = pd.rolling_mean(df['Close'], window=252).shift(1)
df_new['ratio_avg_price_5_30'] = df_new['avg_price_5'] / df_new['avg_price_30']
df_new['ratio_avg_price_5_365'] = df_new['avg_price_5'] / df_new['avg_price_365']
df_new['ratio_avg_price_30_365'] = df_new['avg_price_30'] / df_new['avg_price_365']
# average volume
df_new['avg_volume_5'] = pd.rolling_mean(df['Volume'], window=5).shift(1)
df_new['avg_volume_30'] = pd.rolling_mean(df['Volume'], window=21).shift(1)
df_new['avg_volume_365'] = pd.rolling_mean(df['Volume'], window=252).shift(1)
df_new['ratio_avg_volume_5_30'] = df_new['avg_volume_5'] / df_new['avg_volume_30']
df_new['ratio_avg_volume_5_365'] = df_new['avg_volume_5'] / df_new['avg_volume_365']
df_new['ratio_avg_volume_30_365'] = df_new['avg_volume_30'] / df_new['avg_volume_365']
# standard deviation of prices
df_new['std_price_5'] = pd.rolling_std(df['Close'], window=5).shift(1)
df_new['std_price_30'] = pd.rolling_std(df['Close'], window=21).shift(1)
df_new['std_price_365'] = pd.rolling_std(df['Close'], window=252).shift(1)
df_new['ratio_std_price_5_30'] = df_new['std_price_5'] / df_new['std_price_30']
df_new['ratio_std_price_5_365'] = df_new['std_price_5'] / df_new['std_price_365']
df_new['ratio_std_price_30_365'] = df_new['std_price_30'] / df_new['std_price_365']
# standard deviation of volumes
df_new['std_volume_5'] = pd.rolling_std(df['Volume'], window=5).shift(1)
df_new['std_volume_30'] = pd.rolling_std(df['Volume'], window=21).shift(1)
df_new['std_volume_365'] = pd.rolling_std(df['Volume'], window=252).shift(1)
df_new['ratio_std_volume_5_30'] = df_new['std_volume_5'] / df_new['std_volume_30']
df_new['ratio_std_volume_5_365'] = df_new['std_volume_5'] / df_new['std_volume_365']
df_new['ratio_std_volume_30_365'] = df_new['std_volume_30'] / df_new['std_volume_365']
# # return
df_new['return_1'] = ((df['Close'] - df['Close'].shift(1)) / df['Close'].shift(1)).shift(1)
df_new['return_5'] = ((df['Close'] - df['Close'].shift(5)) / df['Close'].shift(5)).shift(1)
df_new['return_30'] = ((df['Close'] - df['Close'].shift(21)) / df['Close'].shift(21)).shift(1)
df_new['return_365'] = ((df['Close'] - df['Close'].shift(252)) / df['Close'].shift(252)).shift(1)
df_new['moving_avg_5'] = pd.rolling_mean(df_new['return_1'], window=5)
df_new['moving_avg_30'] = pd.rolling_mean(df_new['return_1'], window=21)
df_new['moving_avg_365'] = pd.rolling_mean(df_new['return_1'], window=252)
# the target
df_new['close'] = df['Close']
df_new = df_new.dropna(axis=0)
return df_new