import numpy as np
import matplotlib.pyplot as plt
# Define number of rows and number of columns
n_rows = 100
n_columns = 1000
mu = np.arange(n_rows)*1e-2
sigma = np.arange(n_columns)*1e-3
print(mu.shape)
print(sigma.shape)
np.random.seed(10)
a = np.random.randn(n_rows,n_columns)
print(a.shape)
# Shift elements of a so that the mean of row i is mu_i.
b = a+mu[...,np.newaxis]
print(b.shape)
# Add the sigma_j part.
x = sigma[np.newaxis,...]*a+mu[...,np.newaxis]
print(x.shape)
print(x)
mean_per_column = x.mean(axis=0)
print(mean_per_column.shape)
print(mean_per_column[:10])
plt.figure()
plt.hist(mean_per_column, bins=30)
plt.xlabel('mean per column')
plt.ylabel('frequency')
plt.tight_layout()
The distribution peaks at 0.5.
std_per_row = x.std(axis=1)
print(std_per_row.shape)
print(std_per_row[:10])
plt.figure()
plt.hist(std_per_row, bins=30)
plt.xlabel('std per row')
plt.ylabel('frequency')
plt.tight_layout()
# Indices where condition is true.
w = np.where(x>0.7)
print(w)
print(w[0]) # Indices for rows.
print(w[0].shape)
print(w[1]) # Indices for columns.
print(w[1].shape)
# For each row, find all elements of the row that are above 0.7 and calculate their mean.
for r in range(n_rows):
wr = np.where(w[0]==r)
print(x[w[0][wr],w[1][wr]].mean())