#!/usr/bin/env python3
#
'''
.. module:: PCA
:synopsis: Principal Component Analysis (PCA)
.. moduleauthor:: Cor de Vries <c.p.de.vries@sron.nl>
'''
import sys,os
import numpy
import matplotlib
matplotlib.use('GTK3Agg')
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from numpy import linalg as LA
[docs]def cov(m, y=None, rowvar=1, bias=0, ddof=None):
"""
Estimate a covariance matrix, given data.
Covariance indicates the level to which two variables vary together.
If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`,
then the covariance matrix element :math:`C_{ij}` is the covariance of
:math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance
of :math:`x_i`.
Args:
* `m` = array_like
A 1-D or 2-D array containing multiple variables and observations.
Each row of `m` represents a variable, and each column a single
observation of all those variables. Also see `rowvar` below.
* `y` = array_like, optional
An additional set of variables and observations. `y` has the same
form as that of `m`.
* `rowvar` = int, optional
If `rowvar` is non-zero (default), then each row represents a
variable, with observations in the columns. Otherwise, the relationship
is transposed: each column represents a variable, while the rows
contain observations.
* `bias` = int, optional
Default normalization is by ``(N - 1)``, where ``N`` is the number of
observations given (unbiased estimate). If `bias` is 1, then
normalization is by ``N``. These values can be overridden by using
the keyword ``ddof`` in numpy versions >= 1.5.
* `ddof` = int, optional
.. versionadded:: 1.5
If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
the number of observations; this overrides the value implied by
``bias``. The default value is ``None``.
Returns:
* `out` = ndarray
The covariance matrix of the variables.
See Also
--------
corrcoef : Normalized covariance matrix
Examples
--------
Consider two variables, :math:`x_0` and :math:`x_1`, which
correlate perfectly, but in opposite directions:
>>> x = np.array([[0, 2], [1, 1], [2, 0]]).T
>>> x
array([[0, 1, 2],
[2, 1, 0]])
Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance
matrix shows this clearly:
>>> np.cov(x)
array([[ 1., -1.],
[-1., 1.]])
Note that element :math:`C_{0,1}`, which shows the correlation between
:math:`x_0` and :math:`x_1`, is negative.
Further, note how `x` and `y` are combined:
>>> x = [-2.1, -1, 4.3]
>>> y = [3, 1.1, 0.12]
>>> X = np.vstack((x,y))
>>> print np.cov(X)
[[ 11.71 -4.286 ]
[ -4.286 2.14413333]]
>>> print np.cov(x, y)
[[ 11.71 -4.286 ]
[ -4.286 2.14413333]]
>>> print np.cov(x)
11.71
"""
# Check inputs
if ddof is not None and ddof != int(ddof):
raise ValueError("ddof must be integer")
X = numpy.array(m, ndmin=2)
if X.size == 0:
# handle empty arrays
return numpy.array(m)
if X.shape[0] == 1:
rowvar = 1
if rowvar:
axis = 0
tup = (slice(None), numpy.newaxis)
else:
axis = 1
tup = (numpy.newaxis, slice(None))
if y is not None:
y = numpy.array(y, copy=False, ndmin=2)
X = concatenate((X, y), axis)
X -= X.mean(axis=1-axis)[tup]
if rowvar:
N = X.shape[1]
else:
N = X.shape[0]
if ddof is None:
if bias == 0:
ddof = 1
else:
ddof = 0
fact = float(N - ddof)
if not rowvar:
return (numpy.dot(X.T, X.conj()) / fact).squeeze()
else:
return (numpy.dot(X, X.T.conj()) / fact).squeeze()
[docs]class PCA:
'''
Class for PCA analysis
'''
def __init__( self, n_components=10 ):
self.components=n_components