我正在尝试通过传递夏普比率来自定义损失函数,夏普比率是金融资产回报的平均值除以其标准差。这是我尝试使用 LSTM 进行资产分配的项目的一部分。由于 LSTM 使用 3D 数组,我在这里遇到了一些麻烦。无需进一步说明,代码:
def sharpe_loss(X_train,y_pred):
y_pred = tf.Variable(y_pred,dtype=tf.float64)
port_ret = tf.reduce_sum(tf.multiply(data,y_pred),axis=1)
s_ratio = K.mean(port_ret)/K.std(port_ret)
return exp(-s_ratio)
我将夏普比率设为负数,因为系统会尝试将其最小化,而我想要相反。下面是代码的完整范围。谢谢你的帮助。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import keras.backend as K
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Flatten, Dense, BatchNormalization
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae
symbols=['GLD','VYM','AOK','BNDX','QCLN']
data = pd.DataFrame()
for sym in symbols:
data[sym] = yf.download(sym,period='MAX')['Adj Close']
log_ret = np.log(data/data.shift()).dropna()
log_ret = log_ret.resample('BMS').first().dropna()
covariances = {}
for i in log_ret.index:
mask = ((log_ret.index.month == i.month) & (log_ret.index.year == i.year))
covariances[i] = log_ret.loc[mask].cov()
port_ret, port_vol , port_weights = {}, {}, {}
for date in covariances.keys():
cov = covariances[date]
for single_portfolio in range(500):
weights = np.random.random(5)
weights /= np.sum(weights)
returns = np.dot(weights, log_ret.loc[date])
volatility = np.sqrt(np.dot(weights.T,np.dot(cov,weights)))
port_ret.setdefault(date,[]).append(returns)
port_vol.setdefault(date,[]).append(volatility)
port_weights.setdefault(date,[]).append(weights)
sharpe_ratio, max_sharpe_idxs = {},{}
for date in port_ret.keys():
for i, ret in enumerate(port_ret[date]):
volatility = port_vol[date][i]
sharpe_ratio.setdefault(date,[]).append(ret/volatility)
max_sharpe_idxs[date] = np.argmax(sharpe_ratio[date])
targets, features = [], []
for date, ret in log_ret.iterrows():
best_idx = max_sharpe_idxs[date]
targets.append(port_weights[date][best_idx])
features.append(ret)
targets = np.array(targets)
features = np.array(features)
look_back = 12
scaler_x = MinMaxScaler()
features = scaler_x.fit_transform(features)
scaler_y = MinMaxScaler()
targets = scaler_y.fit_transform(targets)
def create_dataset(x,y):
data_x = []
data_y = []
for i in range(features.shape[0] - look_back):
x_i = x[i:(i+look_back),:]
data_x.append(x_i)
y_i = y[(i+look_back),:]
data_y.append(y_i)
return np.array(data_x),np.array(data_y)
X, y = create_dataset(features,targets)
X_train,X_test,y_train,y_test = train_test_split(X, y,
test_size=0.2,shuffle=False)
callback_list=[EarlyStopping(monitor='val_mae',patience=7)]
def sharpe_loss(X_train,y_pred):
y_pred = tf.Variable(y_pred,dtype=tf.float64)
port_ret = tf.reduce_sum(tf.multiply(data,y_pred),axis=1)
s_ratio = K.mean(port_ret)/K.std(port_ret)
return exp(-s_ratio)
model = Sequential()
model.add(LSTM(64,activation='relu',return_sequences=True))
model.add(BatchNormalization())
model.add(LSTM(32,return_sequences=True))
model.add(LSTM(16,return_sequences=False))
model.add(Dense(5,activation='softmax'))
model.compile(loss=sharpe_loss,optimizer='adam',metrics=['mae'])
history = model.fit(X_train,y_train,epochs=100,batch_size=7,
validation_data=(X_test,y_test), callbacks=callback_list)