Portfolio example
This example is based on the eficient portfolio theory by Harry Markowitz [14].
[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pymoo.util.remote import Remote
from pymoo.core.problem import Problem
from pymoo.core.repair import Repair
from pymoo.optimize import minimize
from pymoode.algorithms import GDE3
from pymoode.survival import RankAndCrowding
[2]:
# Read file from pymoo
file = Remote.get_instance().load("examples", "portfolio_allocation.csv", to=None)
dataset = pd.read_csv(file, parse_dates=True, index_col="date")
[3]:
dataset.head()
[3]:
| GOOG | AAPL | FB | BABA | AMZN | GE | AMD | WMT | BAC | GM | T | UAA | SHLD | XOM | RRC | BBY | MA | PFE | JPM | SBUX | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | ||||||||||||||||||||
| 1989-12-29 | NaN | 0.117203 | NaN | NaN | NaN | 0.352438 | 3.9375 | 3.486070 | 1.752478 | NaN | 2.365775 | NaN | NaN | 1.766756 | NaN | 0.166287 | NaN | 0.110818 | 1.827968 | NaN |
| 1990-01-02 | NaN | 0.123853 | NaN | NaN | NaN | 0.364733 | 4.1250 | 3.660858 | 1.766686 | NaN | 2.398184 | NaN | NaN | 1.766756 | NaN | 0.173216 | NaN | 0.113209 | 1.835617 | NaN |
| 1990-01-03 | NaN | 0.124684 | NaN | NaN | NaN | 0.364050 | 4.0000 | 3.660858 | 1.780897 | NaN | 2.356516 | NaN | NaN | 1.749088 | NaN | 0.194001 | NaN | 0.113608 | 1.896803 | NaN |
| 1990-01-04 | NaN | 0.125100 | NaN | NaN | NaN | 0.362001 | 3.9375 | 3.641439 | 1.743005 | NaN | 2.403821 | NaN | NaN | 1.731422 | NaN | 0.190537 | NaN | 0.115402 | 1.904452 | NaN |
| 1990-01-05 | NaN | 0.125516 | NaN | NaN | NaN | 0.358586 | 3.8125 | 3.602595 | 1.705114 | NaN | 2.287973 | NaN | NaN | 1.722587 | NaN | 0.190537 | NaN | 0.114405 | 1.912100 | NaN |
[4]:
dataset.tail()
[4]:
| GOOG | AAPL | FB | BABA | AMZN | GE | AMD | WMT | BAC | GM | T | UAA | SHLD | XOM | RRC | BBY | MA | PFE | JPM | SBUX | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | ||||||||||||||||||||
| 2018-04-05 | 1027.810059 | 172.800003 | 159.339996 | 172.570007 | 1451.750000 | 13.43 | 10.02 | 87.809998 | 30.320000 | 38.000000 | 35.632843 | 17.469999 | 2.97 | 76.019997 | 14.52 | 72.120003 | 175.550003 | 35.730000 | 111.879997 | 59.139999 |
| 2018-04-06 | 1007.039978 | 168.380005 | 157.199997 | 167.520004 | 1405.229980 | 13.06 | 9.61 | 86.690002 | 29.629999 | 37.680000 | 35.130001 | 16.980000 | 2.88 | 74.870003 | 13.97 | 70.489998 | 169.699997 | 35.169998 | 109.089996 | 58.340000 |
| 2018-04-09 | 1015.450012 | 170.050003 | 157.929993 | 169.869995 | 1406.079956 | 12.83 | 9.53 | 86.279999 | 29.870001 | 37.830002 | 35.169998 | 16.639999 | 2.82 | 74.870003 | 13.93 | 69.820000 | 170.339996 | 35.459999 | 110.400002 | 58.700001 |
| 2018-04-10 | 1031.640015 | 173.250000 | 165.039993 | 177.100006 | 1436.219971 | 13.05 | 9.98 | 86.449997 | 30.480000 | 39.070000 | 35.810001 | 16.820000 | 3.07 | 77.070000 | 14.78 | 71.720001 | 174.720001 | 35.950001 | 112.510002 | 59.410000 |
| 2018-04-11 | 1019.969971 | 172.440002 | 166.320007 | 175.360001 | 1427.050049 | 12.97 | 9.82 | 85.910004 | 29.900000 | 39.000000 | 35.250000 | 16.740000 | 3.30 | 77.430000 | 14.99 | 70.910004 | 172.360001 | 35.790001 | 110.620003 | 59.419998 |
The original dataset goes back to 1990, of which the returns probably have small or no impact in expected returns. Therefore I will select only adjusted prices after 2008.
[5]:
dataset = dataset.loc[dataset.index > '2008-01-01', :]
Estimating returns based on past performance
[6]:
# Number of trading days
N = 252
# Simple returns
simple_returns = (dataset / dataset.shift(1) - 1.0).dropna()
mean_simple_returns = simple_returns.mean(axis=0)
cov_simple_matrix = simple_returns.cov()
# Obtain variance of individual assets
individual_var = pd.Series({key: cov_simple_matrix.loc[key, key] for key in cov_simple_matrix.index})
individual_sigma = individual_var.apply(np.sqrt)
[7]:
fig, ax = plt.subplots(figsize=[6, 5], dpi=100)
ax.scatter(100 * individual_sigma, 100 * mean_simple_returns, color="navy", label="Individual Assets")
ax.set_xlabel(r"Mean daily $\sigma$ [%]")
ax.set_ylabel("Mean daily returns [%]")
fig.tight_layout()
plt.show()
Multi-objective problem
\[\begin{split}\begin{align}
\text{min} \; \; & -\mu_{P}=-\sum_{i=1}^N w_i \mu_{i}\\
& \sigma_{P} = \sqrt{\sum_{i=1}^{N} \sum_{j=1}^{N} w_{i} w_{j} \sigma_{i, j}}\\
\text{s.t.} \; \; & \sum_{i} w_i = 1\\
& 0 \leq w_i \leq 1 \; \forall \; i \in 1, 2, ..., N
\end{align}\end{split}\]
[8]:
# Declare mu and sigma
mu = mean_simple_returns.values
sigma = cov_simple_matrix.values
[9]:
class PortfolioProblem(Problem):
def __init__(self, mu, sigma, risk_free_rate=0.02/252, **kwargs):
super().__init__(n_var=mu.shape[0], n_obj=2, xl=0.0, xu=1.0, n_ieq_constr=1, **kwargs)
self.mu = mu
self.sigma = sigma
self.risk_free_rate = risk_free_rate
def _evaluate(self, X, out, *args, **kwargs):
# Remember each line in X corresponds to an individual, with w in the columns
exp_return = X.dot(self.mu).reshape([-1, 1])
exp_sigma = np.sqrt(np.sum(X * X.dot(self.sigma), axis=1, keepdims=True))
sharpe = (exp_return - self.risk_free_rate) / exp_sigma
out["F"] = np.column_stack([exp_sigma, -exp_return])
out["G"] = np.sum(X, axis=1, keepdims=True) - 1
out["sharpe"] = sharpe
Sharpe’s ratio:
\[\begin{equation}
\displaystyle \frac{\mu_{P} - \mu_{f}}{\sigma_{P}}
\end{equation}\]
[10]:
class PortfolioRepair(Repair):
def _do(self, problem, X, **kwargs):
X[X < 1e-6] = 0
return X / X.sum(axis=1, keepdims=True)
[11]:
problem = PortfolioProblem(mu, sigma, 0.02/252)
gde3 = GDE3(
80, CR=0.9,
survival=RankAndCrowding(crowding_func="pcd"),
repair=PortfolioRepair(),
)
res = minimize(
problem,
gde3,
("n_gen", 250),
seed=12,
)
[12]:
def evaluate_as_function(X, mu, sigma):
risk_free_allocation = 1 - X.sum(axis=1)
exp_return = X.dot(mu).reshape([-1, 1])
exp_sigma = np.sqrt(np.sum(X * X.dot(sigma), axis=1, keepdims=True))
return np.column_stack([exp_sigma, exp_return])
[13]:
X_mc = np.random.rand(100000, len(mu))
X_mc = X_mc / np.sum(X_mc, axis=1, keepdims=True)
F_mc = evaluate_as_function(X_mc, mu, sigma)
[14]:
fig, ax = plt.subplots(figsize=[6, 5], dpi=100)
fig.patch.set_facecolor('white')
ax.scatter(res.F[:, 0] * 100, -res.F[:, 1] * 100, color="navy", label="GDE3")
ax.scatter(100 * individual_sigma, 100 * mean_simple_returns, color="firebrick", label="Individual Assets", zorder=2)
ax.scatter(F_mc[:, 0] * 100, F_mc[:, 1] * 100, color="grey", alpha=0.2, label="Monte Carlo", zorder=0)
argmax_sharpe = res.opt.get("sharpe").argmax()
best_sharpe = res.opt.get("sharpe").max()
ax.scatter(res.F[argmax_sharpe, 0] * 100, -res.F[argmax_sharpe, 1] * 100, color="darkgoldenrod", label="Best Sharpe", zorder=4)
ax.plot([0, 100 * res.F[:, 0].max()],
[100 * problem.risk_free_rate, 100 * problem.risk_free_rate + 100 * best_sharpe * res.F[:, 0].max()],
color="black", linestyle="--", zorder=3, label="Risk-Free Tangency Line")
ax.set_xlim([0, 2.0])
ax.set_ylim([-0.02, None])
ax.legend()
ax.set_xlabel(r"Mean daily $\sigma$ [%]")
ax.set_ylabel("Mean daily returns [%]")
fig.tight_layout()
plt.show()
[15]:
tickers = np.array(dataset.columns)
allocation = res.X[argmax_sharpe, :]
order = np.flip(np.argsort(allocation))
print("Best sharpe:")
for j in order:
_t = tickers[j]
_a = allocation[j] * 100
print(f"{_t}: {_a:.2f}%")
Best sharpe:
AMZN: 48.14%
MA: 19.58%
BBY: 13.95%
JPM: 13.05%
AMD: 4.68%
AAPL: 0.30%
WMT: 0.28%
FB: 0.01%
RRC: 0.00%
GM: 0.00%
PFE: 0.00%
T: 0.00%
GE: 0.00%
UAA: 0.00%
BAC: 0.00%
BABA: 0.00%
SBUX: 0.00%
SHLD: 0.00%
XOM: 0.00%
GOOG: 0.00%
[16]:
lowest_variance = np.argmin(res.F[:, 0])
allocation = res.X[lowest_variance, :]
order = np.flip(np.argsort(allocation))
print("Lowest variance:")
for j in order:
_t = tickers[j]
_a = allocation[j] * 100
print(f"{_t}: {_a:.2f}%")
Lowest variance:
T: 28.71%
PFE: 21.28%
WMT: 15.50%
SBUX: 12.76%
XOM: 8.00%
AAPL: 3.57%
BABA: 3.39%
MA: 1.87%
FB: 1.69%
BBY: 1.01%
GM: 0.82%
AMZN: 0.66%
BAC: 0.26%
GOOG: 0.25%
SHLD: 0.10%
GE: 0.07%
AMD: 0.03%
RRC: 0.03%
JPM: 0.01%
UAA: 0.01%
[17]:
best_returns = np.argmin(res.F[:, 1])
allocation = res.X[best_returns, :]
order = np.flip(np.argsort(allocation))
print("Best returns:")
for j in order:
_t = tickers[j]
_a = allocation[j] * 100
print(f"{_t}: {_a:.2f}%")
Best returns:
AMZN: 70.85%
AMD: 29.09%
BBY: 0.02%
FB: 0.02%
UAA: 0.01%
RRC: 0.01%
BABA: 0.00%
GOOG: 0.00%
JPM: 0.00%
MA: 0.00%
BAC: 0.00%
AAPL: 0.00%
GE: 0.00%
SBUX: 0.00%
WMT: 0.00%
T: 0.00%
SHLD: 0.00%
XOM: 0.00%
PFE: 0.00%
GM: 0.00%