## demonstrate calculating PCA from the covariance matrix for a very
## small data set: say time series of four stocks. Eli Tziperman,
## APM120, 201709
import numpy as np
from numpy import linalg
import scipy as scipy
import matplotlib.pyplot as plt
import matplotlib

## make data:
N=100000; # number of time points
M=4; # number of data points at a given time
Np=200; # portion of data to plot
# create data:
V1=np.array([0, 1.6, 0.0, -1]); V2=np.array([0.5, 0, 1.5, 0]);
V1.shape=(4,1);V2.shape=(4,1);
t=np.arange(0,N); tp=np.arange(0,Np); t.shape=(1,N); tp.shape=(1,Np);
F=3*V1@np.cos(t/3)+V2@np.cos(t/5);

## display a small sample of the data:
print('sample data:')
np.set_printoptions(linewidth=132,precision=5)
print(F[:,0:8]);
wait = input("Enter to continue...")

## plot data:
matplotlib.rcParams.update({'font.size': 18}) # default font size
fig1=plt.figure(1); plt.clf();
hl1,=plt.plot(tp.T,F[0,tp].T,'r-',linewidth=2);
hl2,=plt.plot(tp.T,F[1,tp].T,'g-',linewidth=2);
hl3,=plt.plot(tp.T,F[2,tp].T,'b-',linewidth=2);
hl4,=plt.plot(tp.T,F[3,tp].T,'c-',linewidth=2);
plt.legend([hl1,hl2,hl3,hl4],['F1','F2','F3','F4']);
plt.xlabel('day'); plt.ylabel('price'); plt.title('Stock prices');
plt.pause(0.05)
print('time series of stock prices plotted.');
wait = input("Enter to continue...")

## calculate covariance matrix:
print('Covariance matrix: C=F*F''/N=')
print('                 red             green             blue             cyan');
C=F@F.T/N;
mycolor=['red  ','green','blue ','cyan '];
for i in range(0,4):
    print(repr(mycolor[i]),repr(C[i,:]));

wait = input("Enter to continue...")

## Calculate PCs:
D,U=np.linalg.eig(C);
## sort eigenvalues and vectors by size of abs(eigenvalue) from large to small:
idx = np.abs(D).argsort(); idx=idx[::-1];
D = D[idx]
U = U[:,idx]

print('\n\nPCs (columns of U):');
print(U);
wait = input("Enter to continue...")

print('eigen values (D):')
print(D);
wait = input("Enter to continue...")

print('dimensional time series of PC coefficients (columns of V):')
print('T=U.T@F;'); 
T=U.T@F; 
print(T[:,0:7]);

## plot time expansion coefficients:
fig2=plt.figure(2); plt.clf();
hl1,=plt.plot(tp.T,T[0,tp].T,'m-',linewidth=2);
hl2,=plt.plot(tp.T,T[1,tp].T,'c-',linewidth=2);
hl3,=plt.plot(tp.T,T[2,tp].T,'b-',linewidth=2);
hl4,=plt.plot(tp.T,T[3,tp].T,'k--',linewidth=2); 
plt.legend([hl1,hl2,hl3,hl4],['v1','v2','v3','v4']);
plt.xlabel('time'); plt.ylabel('$v_i(t)$'); plt.title('expansion coefficients $v_{ni}$:')
plt.pause(0.05)
wait = input("Enter to continue...")

## demonstrate reconstruction of data using some of the PCs,
## i.e. dimension reduction:
print('reconstruct data using only two of the PCs (dimension reduction):')
print('F_reconstructed=U[:,0:1]@T[0:1,:];'); 
F_reconstructed=U[:,0:2]@T[0:2,:];
print("F="); print(F[:,0:5]);
print("F_reconstructed="); print(F_reconstructed[:,0:5]);
print('reconstruction error=np.max(np.abs(F-F_reconstructed)):');
F_reconstruction_error=np.max(np.abs(F-F_reconstructed));
print(F_reconstruction_error);

## variance explained by each PC:
print('Explained variance is given by eigenvalues of covariance matrix,\n '
      +'variance_explained_by_each_mode=D/sum(D)=')
variance_explained_by_each_mode=D/sum(D);
print(variance_explained_by_each_mode);