Christian Elsasser (che@physik.uzh.ch)
The content of the lecture might be reused, also in parts, under the CC-licence by-sa 4.0
import scipy
print("%s -- Version: %s"%(scipy.__name__,scipy.__version__))
import numpy as np
print("%s -- Version: %s"%(np.__name__,np.__version__))
import matplotlib.pyplot as plt
from scipy import optimize
Let's define the non-linear function $f(x) = x^3+8$ having the root at $-2$.
And define also its first and second derivative
def f_1_0(x):
return x**3+8
def f_1_0_p(x):
return 3*x**2
def f_1_0_pp(x):
return 6*x
optimize.bisect(f_1_0,a=-10,b=10) # a,b are the interval boundaries for the bisection method
We want now to get more information about the result
optimize.bisect(f_1_0,a=-10,b=10,full_output=True) # In newer Scipy version (1.2.1+) full_output is the default output
The first item is the result as float, the second item a RootResult
object
optimize.newton(f_1_0,x0=1) # Unfortunately newton in Scipy 1.1.0 does not allow a full output, but versions 1.2.1+ do
In case of no specification of fprime argument the secant method is invoked, with fprime
(first derivative) the Newton-Raphson method is used, and with fprime2
(second derivative) in addition the Halley method is used.
print(optimize.newton(f_1_0,x0=100,fprime=None, fprime2=None, ))
print(optimize.newton(f_1_0,x0=100,fprime=f_1_0_p,fprime2=None, ))
print(optimize.newton(f_1_0,x0=100,fprime=f_1_0_p,fprime2=f_1_0_pp))
%timeit optimize.newton(f_1_0,x0=100,fprime=None, fprime2=None, )
%timeit optimize.newton(f_1_0,x0=100,fprime=f_1_0_p,fprime2=None, )
%timeit optimize.newton(f_1_0,x0=100,fprime=f_1_0_p,fprime2=f_1_0_pp)
But this result can vary depending on the initial guess, here starting closer to the solution would lead to a better performance of the secant case (1st case)
Let's be naughty to the algorithm ...
optimize.newton(f_1_0,x0=0)
... which is obviously a wrong result despite the convergence (In Scipy 1.2.1+ the newton
method has also a RootResult
object when full output is specified.)
What happens when specifying the first derivative?
optimize.newton(f_1_0,x0=0,fprime=f_1_0_p)
The module warnings
allows to "promote" warnings to errors
import warnings
warnings.filterwarnings('error')
optimize.newton(f_1_0,x0=0,fprime=f_1_0_p)
Resetting to default is helpful since certain modules will otherwise on import
through errors.
warnings.filterwarnings('ignore')
optimize.brentq(f_1_0,-100,100,full_output=True) # brenth also exists as an alternative implementation
We can also speficy parameters of the function in the root-finding
def f_1_1(x,a):
return x**3+a
optimize.brentq(f_1_1,-100,100,full_output=True,args=(27,))
from scipy import stats
Let's load some data (imagine a sample of measurements and you want to estimate the underlying distribution)
data = np.loadtxt("data_ml.csv")
data
Let's quickly inspect them as a histogram data
bins = np.linspace(-10,10,51)
h,bins = np.histogram(data,bins)
fig = plt.figure("data",(6,4))
ax = plt.subplot()
ax.bar((bins[:-1]+bins[1:])*0.5,h,width=bins[1:]-bins[:-1],color="grey")
ax.set_xlabel("Measurement variable")
ax.set_ylabel("Number of observations")
ax.set_xlim([-10,10])
plt.show()
First approach: Gaussian distribution - we define the function describing the probability density with p[0]
as $\mu$ and p[1]
as $\sigma$.
f_gauss = lambda x,p: stats.norm(p[0],p[1]).pdf(x)
# Non-lambda definition:
# def f_gauss(x,p)
# return stats.norm(p[0],p[1]).pdf(x)
!!! The order of the parameters is in scipy not always obvious and still not well documented !!!
It follows for many distribution a location-scale approach (1st parameter: location, 2nd: scale), even for distributions like the expotential distribution that do not belong to this type of distribution
Let's define the negative log-likelihood function that we want to minimize in a general fashion
def log_likelihood(p,data,f):
return -np.log(f(data,p)).sum()
# here the data & distribution acts as parameter
Let's minimise the log-likelihood function with the data and starting parameters $\mu=0,\sigma=2.5$
We use as first minimisation method the Conjugate Gradient method
res_g = optimize.minimize(log_likelihood,[0.0,2.5],args=(data,f_gauss),method="CG")
print(res_g)
For completeness let's check the analytical results which are the mean and the standard deviation of the sample
print("mu = {0:10.7f}".format(np.mean(data)))
print("sigma = {0:10.7f}".format(np.std(data)))
res_g = optimize.minimize(log_likelihood,[0.0,2.5], args=(data,f_gauss),method="TNC")
print(res_g)
res_g["x"]
We create the fitted distribution
f_gauss_fit = stats.norm(*(res_g.x))
optimize.minimize?
Let's try a t-distribution to accomodate for the longer tails. p[0]
as number of degrees of freedom, p[1]
as centre, p[2]
as width parameter.
f_t = lambda x,p: stats.t(p[0],p[1],p[2]).pdf(x)
res_t = optimize.minimize(log_likelihood,[11,0.0,2.6], args=(data,f_t),method="Nelder-Mead")
print(res_t)
Let's do some testing for model selection with the Akaike Information Criterion (AIC) and the Bayesian Information Criterion (BIC):
AIC_g = 2*len(res_g.x)+2*res_g.fun
AIC_t = 2*len(res_t.x)+2*res_t.fun
BIC_g = np.log(len(data))*len(res_g.x)+2*res_g.fun
BIC_t = np.log(len(data))*len(res_t.x)+2*res_t.fun
print("AIC Gauss: {0:8.1f}".format(AIC_g))
print("AIC t-dist: {0:8.1f}".format(AIC_t))
print("BIC Gauss: {0:8.1f}".format(BIC_g))
print("BIC t-dist: {0:8.1f}".format(BIC_t))
Let's create the fitted t-distribution
f_t_fit = stats.t(*(res_t.x))
x_vec = np.linspace(-10,10,201)
fig = plt.figure("data",(6,4))
fig,ax = plt.subplots()
ax.bar((bins[:-1]+bins[1:])*0.5,h,width=bins[1:]-bins[:-1],color="grey")
ax.plot(x_vec,f_t_fit.pdf(x_vec)*data.size*(bins[1:]-bins[:-1]).mean(),color="#a00014")
ax.plot(x_vec,f_gauss_fit.pdf(x_vec)*data.size*(bins[1:]-bins[:-1]).mean(),color="#0014a0")
ax.set_xlabel("Measurement variable")
ax.set_ylabel("Number of observations")
ax.legend(["t-dist","Gauss","Data"])
ax.set_xlim([-10,10])
plt.show()
Sampling 100'000 = (500 by 200) random measurements according to the estimated t-distribution
mc_data = f_t_fit.rvs((500,200))
Histogram the measurments and plot them
bins = np.linspace(-10,10,51)
h,bins = np.histogram(mc_data,bins)
fig = plt.figure("mc_data",(6,4))
ax = plt.subplot()
ax.bar((bins[:-1]+bins[1:])*0.5,h,width=bins[1:]-bins[:-1],color="grey")
ax.plot(x_vec,f_t_fit.pdf(x_vec)*mc_data.size*(bins[1:]-bins[:-1]).mean(),color="#a00014")
ax.set_xlabel("Measurement variable")
ax.set_ylabel("Number of observations")
ax.set_xlim([-10,10])
plt.show()
Get the 5% and the 95% quantile
print(" 5% quantile: {0:6.3f}".format(f_t_fit.ppf(0.05)))
print("95% quantile: {0:6.3f}".format(f_t_fit.ppf(0.95)))
Certain minimisation method require you to indicate the first derivative (jacobian) or even second derivate (hessian) as analytical function.
Let's define a simple 2D-function: $f(x_0,x_1) = (x_0-5)^2+(x_1-6)^4$
def f_2_0(x):
return (x[0]-5)**2+(x[1]-6)**4
def f_2_0_p(x):
return np.array([
2*(x[0]-5),
4*(x[1]-6)**3,
])
def f_2_0_pp(x):
return np.array([
[
2,
0
],
[
0,
4*3*(x[1]-6)**2
]
])
Setting the initial guess
p_init = np.array([5,2])
# Please note that this is a special case where one of the coordinates is already at the optimal value
The usual Conjugate Gradient method will fail ...
optimize.minimize(f_2_0,p_init,method="CG")
But switching to the Newton
optimize.minimize(f_2_0,p_init,jac=f_2_0_p,method="Newton-CG")
optimize.minimize(f_2_0,p_init,jac=f_2_0_p,hess=f_2_0_pp,method="Newton-CG")
Let's see what adding the hessian can do in terms of performance
%timeit optimize.minimize(f_2_0,np.array([100,100]),jac=f_2_0_p,method="Newton-CG")
%timeit optimize.minimize(f_2_0,np.array([100,100]),jac=f_2_0_p,hess=f_2_0_pp,method="Newton-CG")
Depending on the situation (number of dimensions, starting position) the speed can be double!
Previously Numpy provided the matrix
class which is now depreciated in favour of the ndarray
class, i.e. the standard numpy arrary.
Matrix multiplication works via the @
operator, matrix inversion and calculation of the Hermitian goes via the corresponding functions.
warnings.filterwarnings("default")
# Create a diagonal matrix
d = np.matrix(np.diag([1,2,3,4]))
d = np.diag([1,2,3,4])
print(d)
# Create a 4x4 matrix with random entries and its inverse
s = np.array(np.random.rand(4,4))
s_inv = np.linalg.inv(s)
# De-diagonalise it
m = s_inv@d@s # matrix multiplication
print(m)
s_inv*d*s
# Get back the eigenvalues
# ... and the eigenvectors
values,transf = np.linalg.eig(m)
print(values)
print(transf)
For solving linear equations there are also dedicated algorithms available in the Scipy package: \begin{align} 5 & = 6x_0+4x_1-2x_2 \\ 3 & = 3x_0+8x_1+2x_2 \\ 7 & = 8x_0+3x_1+x_2 \end{align}
A = np.array([[6,4,-2],[3,8,2],[8,3,1]])
b = np.array([5,3,7])
scipy.linalg.inv(A).dot(b)
scipy.linalg.solve(A,b)
%timeit scipy.linalg.inv(A).dot(b)
%timeit scipy.linalg.solve(A,b)
from scipy.misc import derivative
def f_4_1(x):
return np.sin(x)
derivative(f_4_1,0,dx=1e-1,n=4,order=15)
# `Order' is a confusing name since it is not the order of the derivative
from scipy import integrate
Let's integrate $f(x) = 1/x^2$ between $10^{-2}$ to $1$ (analytical value: 99)
def f_4_2(x):
return 1/x**2
Scipy has methods to calculate based on a sample of $x_i$ and corresponding $y_i = f(x_i)$ the integral based on specific rules (Newton-Cotes methods)
x = np.linspace(0.01,1.00,100)
y = f_4_2(x)
Let's first go for the Trapezoidal rule
integrate.trapz(y,x)
The integral shows a significant error due to the diverging behaviour towards 0.
Let's try the Simpson's rule.
integrate.simps(y,x)
Better due to the higher order, but still not good. So let's go for an adaptive algorith,
integrate.quad(f_4_2,a=0.01,b=1) # First return value is the integral, second the estimated error
%timeit integrate.trapz(y,x)
%timeit integrate.simps(y,x)
%timeit integrate.quad(f_4_2,a=0.01,b=1)
Let's see what happens when we have a function with a discontinuity with $f(x) = \{x<0\}$
def f_4_3(x):
return (x<0)*1.0
We integrate from -1 to x>0 so the integral should be always one!
print(integrate.quad(f_4_3,-1,1))
print(integrate.quad(f_4_3,-1,10))
print(integrate.quad(f_4_3,-1,1000))
points
allows to specify which point are particular points that need to be taken into account in the spacing, and can be even specified as a function.
print(integrate.quad(f_4_3,-1,1000,points=[0]))
Scipy also provides nquad
for multidimensional integration
def f_4_4(x0,x1,x2):
return 1.0/(x0**2*x1**2*x2**2)
integrate.nquad(f_4_4,ranges=((0.1,1),(0.1,1),(0.1,1))) # It should be 9^3 = 729
%timeit integrate.nquad(f_4_4,ranges=((0.1,1),(0.1,1),(0.1,1)))
from scipy.fftpack import fft, fftfreq, fftshift, ifft
import numpy as np
Let's get an array representing 366 days.
t = np.arange(0,365*1+1,1)
And let's generate a time series with
y = 5*np.sin(2*np.pi*t/365)+4*np.sin(2*np.pi*(t-5)/21)+3*np.sin(2*np.pi*t/3)+0.02*t+5*np.random.randn(*t.shape)
plt.figure("freq_spectrum_tooth_saw",[8,12])
fig,ax = plt.subplots()
ax.plot(t,y,color="#a00014")
ax.set_xlim([0,365])
ax.set_xlabel("time")
ax.set_ylabel("Amplitude")
plt.show()
yf = fft(y) # Actual transformation
xf = fftfreq(len(y), 1) # Generation of frequency values
xf = fftshift(xf) # Shift it so that zero is centered
yplot = fftshift(yf) # Corresponding shift for frequency amplitudes
plt.figure("freq_spectrum_tooth_saw",[8,12])
fig,ax = plt.subplots()
ax.plot(xf, np.abs(yplot),color="#0014a0") # yplot values are complex --> abs
ax.set_xlim([0,0.5])
ax.set_xlabel("frequency")
ax.set_ylabel("Amplitude")
plt.show()
Let's check the inverse transformation with ifft
plt.figure("inverse_transform",[8,12])
fig,ax = plt.subplots()
ax.plot(t, y,color="#a00014") # Original
ax.plot(t, ifft(yf),'black',linestyle="dotted") # inverse back transformed
ax.set_xlim([t.min(),t.max()])
plt.show()
Let's run a simple filter and only keep the frequencies in the spectrum that have an amplitude to more than 20% of the maximum value in the spectrum
y_filter = yf*(np.abs(yf)>(0.2*np.abs(yf).max()))
yplot_filter = fftshift(y_filter)
plt.figure("freq_spectrum_tooth_saw",[8,12])
fig,ax = plt.subplots()
ax.plot(xf, np.abs(yplot_filter),color="#0014a0") # yplot values are complex --> abs
ax.set_xlim([0,0.5])
ax.set_xlabel("filtered frequency")
ax.set_ylabel("Filtered Amplitude")
plt.show()
# Let's check the inverse transformation
plt.figure("inverse_transform",[8,12])
fig,ax = plt.subplots()
ax.plot(t, y,color="#a00014") # Original
ax.plot(t, ifft(y_filter),'black') # inverse back transformed
ax.set_xlim([t.min(),t.max()])
plt.show()
from scipy.integrate import quad
Generating two signals
# Generating two signals
f_gauss = lambda x: stats.norm(0,0.5).pdf(x) # Gaussian signal
f_g_osc = lambda x: f_gauss(x)*np.cos(2*np.pi*x) # ... with oscillation
# time range considered
t = np.linspace(-5,5,101)
plt.figure("signals",[8,12])
fig,ax = plt.subplots()
ax.plot(t, f_gauss(t),'b')
ax.plot(t, f_g_osc(t),'r')
ax.set_xlim([t.min(),t.max()])
ax.set_ylim([-1.0,1.0])
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$A(t)$")
plt.show()
# Function that returns the fourier integrand as function, only considering the real part
def get_fourier_integrand(f):
return lambda t,w: np.cos(w*t)*f(t)
# Define Fourier integrands for a given omega
f_i_gauss = lambda w : quad(get_fourier_integrand(f_gauss),-10,10,args=(w,))
f_i_g_osc = lambda w : quad(get_fourier_integrand(f_g_osc),-10,10,args=(w,))
f_i_gauss(1.0) # first entry: integration result, second entry: upper-bound on the error
# Vectorise the integrands (allows to calculate the integration for different omega in parallel)
int_f_gauss = np.vectorize(f_i_gauss)
int_f_g_osc = np.vectorize(f_i_g_osc)
# Spectrum we are considering
w = np.linspace(0,10,51)
# Actual performance of the integration
yw_gauss = int_f_gauss(w)
yw_g_osc = int_f_g_osc(w)
yw_gauss
plt.figure("Signal_spectra",[8,12])
fig,ax = plt.subplots()
ax.plot(w, yw_gauss[0],'b')
ax.plot(w, yw_g_osc[0],'r')
ax.set_xlim([w.min(),w.max()])
ax.set_ylim([0,1.5])
ax.set_xlabel(r"Spectrum $\omega$")
plt.show()
# But actually quad allows already to basically perform a Fourier transfrom via the weighting option
f_i_gauss_alt = lambda w : quad(f_gauss,-10,10,weight="cos",wvar=(w,))
int_f_gauss_alt = np.vectorize(f_i_gauss)
yw_gauss_alt = int_f_gauss_alt(w)
# Comparing the original calculation with the weighting option calculation
plt.figure("Signal_spectrum_comparison",[8,12])
fig,ax = plt.subplots()
ax.plot(w, yw_gauss[0],'b')
ax.plot(w, yw_gauss_alt[0],'r')
ax.set_xlim([w.min(),w.max()])
ax.set_ylim([0,1.5])
ax.set_xlabel(r"Spectrum $\omega$")
plt.show()