Christian Elsasser (che@physik.uzh.ch)
The content of the lecture might be reused, also in parts, under the CC-licence by-sa 4.0
Disclaimer: The approaches presented here are for illustrative purposes and do not represent at all state-of-the-art approaches.
Topics discussed:
from scipy import stats
from scipy.optimize import minimize
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Loading the data
df = pd.read_csv("stocks.csv")
df = df.fillna(method="ffill").set_index(["Date"])
df_pct = df.pct_change().fillna(0)
df_pct.head()
df.plot()
plt.show()
# Transform to percents
smi_pct = np.array(df_pct["SMI"])*100
dji_pct = np.array(df_pct["DJI"])*100
# define the probability density function
# Gaussian with p[0] as mu and p[1] as sigma
f = lambda x,p: stats.norm(p[0],p[1]).pdf(x)
gaussian = stats.norm(5,2)
gaussians = stats.norm(np.array([0,1,2]),5)
gaussians.cdf(1)
# the log-likelihood function to be minimised
# here the data acts as parameter
ll = lambda p,data: -np.log(f(data,p)).sum()
minimize?
smi_result = minimize(ll,[0.1,0.5],args=(smi_pct,),method="TNC")
smi_result
# Be aware:
print("BFGS")
print(minimize(ll,[0.0,0.5],args=(smi_pct,),method="BFGS"))
print("CG")
print(minimize(ll,[0.0,0.5],args=(smi_pct,),method="CG"))
# Different methods require different number of iterations and function evaluation until termination
# ... and also lead to different numerical precision
print("Nelder-Mead")
print(minimize(ll,[0.0,0.5],args=(smi_pct,),method="Nelder-Mead"))
# "True" values
print([smi_pct.mean(),smi_pct.std()])
# Let's go with BFGS since it also gives us an approximation of the inverse Hessian
result_smi = minimize(ll,[0.0,0.5],args=(smi_pct,),method="BFGS")
result_dji = minimize(ll,[0.0,0.5],args=(dji_pct,),method="BFGS")
print(result_smi.success,result_dji.success)
result_smi.x
# Create the data set specific pdf's with the estimated parameters as parameters
f_smi = lambda x: f(x,result_smi.x)
f_dji = lambda x: f(x,result_dji.x)
bins = np.linspace(-4,4,81)
h_smi,bin_smi = np.histogram(smi_pct,bins)
h_dji,bin_dji = np.histogram(dji_pct,bins)
import matplotlib.pyplot as plt
fig = plt.figure("smi",(6,4))
fig,ax = plt.subplots()
ax.bar(bin_smi[:-1],h_smi,width=bin_smi[1:]-bin_smi[:-1],color="b")
ax.plot(bin_smi,f_smi(bin_smi)*h_smi.sum()*(bin_smi[1:]-bin_smi[:-1]).mean(),'k--',linewidth=2)
ax.set_xlabel("Daily returns [%]")
ax.set_ylabel("Number of days")
ax.set_ylim([0,30])
plt.show()
fig = plt.figure("dji",(6,4))
fig,ax = plt.subplots()
ax.bar(bin_dji[:-1],h_dji,width=bin_dji[1:]-bin_dji[:-1],color="r")
ax.plot(bin_dji,f_dji(bin_dji)*h_dji.sum()*(bin_dji[1:]-bin_dji[:-1]).mean(),'k--',linewidth=2)
ax.set_xlabel("Daily returns [%]")
ax.set_ylabel("Number of days")
ax.set_ylim([0,30])
plt.show()
# Create QQ (Quantile-quantile) plots to compare quantiles
# of the data vs. the quantiles of the resulting distribution
x = np.linspace(-3,3,3)
fig = plt.figure("QQ",figsize=(6,6))
fig,ax = plt.subplots()
ax.plot(x,x,'k--')
ax.scatter(sorted(smi_pct),stats.norm(*(result_smi.x)).ppf(np.linspace(0,1,len(smi_pct))),color='b')
ax.scatter(sorted(dji_pct),stats.norm(*(result_dji.x)).ppf(np.linspace(0,1,len(dji_pct))),color='r')
ax.set_xlabel("Quantile data")
ax.set_ylabel("Quantile distribution")
ax.set_xlim([-3,3])
ax.set_ylim([-3,3])
plt.show()
mu_smi = result_smi.x[0]*len(smi_pct)
mu_dji = result_dji.x[0]*len(dji_pct)
sig_smi = result_smi.x[1]*len(smi_pct)**0.5
sig_dji = result_dji.x[1]*len(dji_pct)**0.5
mu_rf = 1.0
mu_dji
rho = np.corrcoef(smi_pct,dji_pct)[0,1]
print(rho)
# Define function to be minimised (risk) and function representing the constraint
mu_min = 14.0
opt_f = lambda p,x : (p[0]*x[0])**2+(p[1]*x[1])**2+2*p[1]*p[0]*x[0]*x[1]*x[2]
con_f = lambda p,mu : mu[0]*p[0]+mu[1]*p[1]+mu[2]*(1-p[0]-p[1])-mu[3]
result = minimize(opt_f,[0.33,0.33],[sig_smi,sig_dji,rho],
bounds = [(0,1),(0,1)], # Example if I only want to have positive fractions (i.e. no short selling)
constraints=({'type':'ineq','fun' : con_f,'args':((mu_smi,mu_dji,mu_rf,mu_min),)}),
method="SLSQP")
print(result)
p = result.x
# And get the final results
mu_pf = np.sum(np.array([mu_smi,mu_dji])*p)+mu_rf*(1-p.sum())
print("#mu = {0:4.2f}%".format(mu_pf))
print("#sig = {0:4.2f}%".format(result.fun**0.5))
Topics discussed:
np.matrix
# Create a diagonal matrix
d = np.matrix(np.diag([1,2,3,4]))
print(d)
# Create a 4x4 matrix with random entries and its inverse
s = np.matrix(np.random.rand(4,4))
s_inv = s.I
# De-diagonalise it
m = s.I*d*s
print(m)
# Get back the eigenvalues
# ... and the eigenvectors
values,transf = np.linalg.eig(m)
print(values)
print(transf)
routes = pd.read_csv("routes.txt",sep=",")
routes.head()
n_flights = (routes["des"].value_counts()+routes["dep"].value_counts()).sort_values(ascending=False)
n_flights.head(10)
abrev = list(n_flights[:1000].index.sort_values())
keep_route = routes["dep"].isin(abrev)&routes["des"].isin(abrev)
routes_kept = routes[keep_route].get(["dep","des"])
routes_kept = routes_kept.drop_duplicates()
mat = np.matrix(np.zeros((len(abrev),len(abrev)),dtype=bool))
for i,r in routes_kept.iterrows():
i_dep = abrev.index(r["dep"])
i_des = abrev.index(r["des"])
mat[i_dep,i_des] = True
mat.shape
eig,trans = np.linalg.eig(mat)
mat
bins = np.linspace(-100,100,51)
h,bins = np.histogram(eig,bins)
fig = plt.figure("graph_spectrum",(6,4))
fig,ax = plt.subplots()
ax.bar(bins[:-1],h,width=bins[1:]-bins[:-1])
plt.show()
from scipy.sparse import dok_matrix,csr_matrix
import scipy.sparse.linalg
routes_kept = routes.get(["dep","des"])
routes_kept = routes_kept.drop_duplicates()
abrev = list(n_flights.index.sort_values())
mat_dok = dok_matrix((len(abrev),len(abrev)),dtype=np.int8)
for i,r in routes_kept.iterrows():
i_dep = abrev.index(r["dep"])
i_des = abrev.index(r["des"])
mat_dok[i_dep,i_des] = True
mat_csr = csr_matrix(mat_dok)
mat_csr.dtype
%timeit mat_dok*mat_dok
%timeit mat_csr*mat_csr
mat_dok = mat_dok.asfptype()
%timeit scipy.sparse.linalg.eigs(mat_dok)
mat_csr = mat_csr.asfptype()
%timeit scipy.sparse.linalg.eigs(mat_csr)
Topics discussed:
from scipy.fftpack import fft, fftfreq, fftshift, ifft
import numpy as np
N = 1000 # Number of sample points
T = 0.02*2*np.pi # Frequency Resolution
t = np.linspace(0.0,N*T,N) # Spanning up time space
y = (t%(2*np.pi))/(2*np.pi) # Tooth-saw function
plt.figure("tooth_saw_function",[8,12])
fig,ax = plt.subplots()
ax.plot(t,y)
ax.set_xlim([t.min(),t.max()])
ax.set_ylim([0,1])
plt.show()
yf = fft(y) # Actual transformation
xf = fftfreq(N, T) # Generation of frequency values
xf = fftshift(xf) # Shift it so that zero is centered
yplot = fftshift(yf) # Corresponding shift for frequency amplitudes
plt.figure("freq_spectrum_tooth_saw",[8,12])
fig,ax = plt.subplots()
ax.plot(xf, 1/N*np.abs(yplot),'r') # yplot values are complex --> abs
ax.set_xlim([-2,2])
ax.set_ylim([0,0.6])
plt.show()
# Let's check the inverse transformation
plt.figure("inverse_transform",[8,12])
fig,ax = plt.subplots()
ax.plot(t, y,'b') # Original
ax.plot(t, ifft(yf),'r') # inverse back transformed
ax.set_xlim([t.min(),t.max()])
ax.set_ylim([0,1.0])
plt.show()
from scipy.integrate import quad
# Generating two signals
f_gauss = lambda x: stats.norm(0,0.5).pdf(x) # Gaussian signal
f_g_osc = lambda x: f_gauss(x)*np.cos(2*np.pi*x*0.5) # ... with oscillation
# time range considered
t = np.linspace(-5,5,101)
plt.figure("signals",[8,12])
fig,ax = plt.subplots()
ax.plot(t, f_gauss(t),'b')
ax.plot(t, f_g_osc(t),'r')
ax.set_xlim([t.min(),t.max()])
ax.set_ylim([-1.0,1.0])
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$A(t)$")
plt.show()
# Function that returns the fourier integrand as function, only considering the real part
def get_fourier_integrand(f):
return lambda t,w: np.cos(w*t)*f(t)
# Define Fourier integrands for a given omega
f_i_gauss = lambda w : quad(get_fourier_integrand(f_gauss),-10,10,args=(w,))
f_i_g_osc = lambda w : quad(get_fourier_integrand(f_g_osc),-10,10,args=(w,))
f_i_gauss(1.0) # first entry: integration result, second entry: upper-bound on the error
# Vectorise the integrands (allows to calculate the integration for different omega in parallel)
int_f_gauss = np.vectorize(f_i_gauss)
int_f_g_osc = np.vectorize(f_i_g_osc)
# Spectrum we are considering
w = np.linspace(0,10,51)
# Actual performance of the integration
yw_gauss = int_f_gauss(w)
yw_g_osc = int_f_g_osc(w)
yw_gauss
plt.figure("Signal_spectra",[8,12])
fig,ax = plt.subplots()
ax.plot(w, yw_gauss[0],'b')
ax.plot(w, yw_g_osc[0],'r')
ax.set_xlim([w.min(),w.max()])
ax.set_ylim([0,1.5])
ax.set_xlabel(r"Spectrum $\omega$")
plt.show()
# But actually quad allows already to basically perform a Fourier transfrom via the weighting option
f_i_gauss_alt = lambda w : quad(f_gauss,-10,10,weight="cos",wvar=(w,))
int_f_gauss_alt = np.vectorize(f_i_gauss)
yw_gauss_alt = int_f_gauss_alt(w)
# Comparing the original calculation with the weighting option calculation
plt.figure("Signal_spectrum_comparison",[8,12])
fig,ax = plt.subplots()
ax.plot(w, yw_gauss[0],'b')
ax.plot(w, yw_gauss_alt[0],'r')
ax.set_xlim([w.min(),w.max()])
ax.set_ylim([0,1.5])
ax.set_xlabel(r"Spectrum $\omega$")
plt.show()