Source code for copulae.core.misc

from typing import Tuple, TypeVar

import numpy as np
import pandas as pd
from scipy import stats

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal

__all__ = ['create_cov_matrix', 'EPS', 'pseudo_obs', 'rank_data', 'tri_indices']

EPS = np.finfo('float').eps
"""Machine Epsilon"""

ArrayVar = TypeVar('ArrayVar', np.ndarray, pd.DataFrame)


def create_cov_matrix(params: np.ndarray):
    """
    Creates a matrix from a given vector of parameters.

    Useful for elliptical copulae where we translate the rhos to the covariance matrix

    Parameters
    ----------
    params: array like
        (1, N) vector of parameters

    Returns
    -------
    numpy array
        (N x N) matrix where the upper and lower triangles are the parameters and the diagonal is a vector of 1
    """
    c = len(params)
    d = int(1 + (1 + 4 * 2 * c) ** 0.5) // 2  # dimension of matrix, determine this from the length of params

    sigma = np.identity(d)
    sigma[tri_indices(d, 1)] = np.tile(params, 2)
    return sigma


[docs]def pseudo_obs(data: ArrayVar, ties='average') -> ArrayVar: """ Compute the pseudo-observations for the given data matrix Parameters ---------- data: (N, D) ndarray Random variates to be converted to pseudo-observations ties: str, optional The method used to assign ranks to tied elements. The options are 'average', 'min', 'max', 'dense' and 'ordinal'. **average** The average of the ranks that would have been assigned to all the tied values is assigned to each value. **min** The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) **max** The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. **dense** Like *min*, but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. 'ordinal': All values are given a distinct rank, corresponding to the order that the values occur in `a`. Returns ------- numpy.array or pandas.DataFrame matrix or vector of the same dimension as `data` containing the pseudo observations Examples -------- >>> from copulae import pseudo_obs >>> from copulae.datasets import load_marginal_data >>> import numpy as np >>> data = load_marginal_data() >>> data.head(3) STUDENT NORM EXP 0 -0.485878 2.646041 0.393322 1 -1.088878 2.906977 0.253731 2 -0.462133 3.166951 0.480696 >>> pseudo_obs(data).head(3) # pseudo-obs is a DataFrame because input is a DataFrame STUDENT NORM EXP 0 0.325225 0.188604 0.557814 1 0.151616 0.399533 0.409530 2 0.336221 0.656115 0.626458 >>> np.random.seed(1) >>> rand = np.random.normal(size=(100, 3)) >>> rand[:3].round(3) array([[ 1.624, -0.612, -0.528], [-1.073, 0.865, -2.302], [ 1.745, -0.761, 0.319]]) >>> pseudo_obs(rand)[:3].round(3) # otherwise returns numpy arrays array([[0.921, 0.208, 0.248], [0.168, 0.792, 0.01 ], [0.941, 0.178, 0.584]]) >>> pseudo_obs(rand.tolist())[:3].round(3) array([[0.921, 0.208, 0.248], [0.168, 0.792, 0.01 ], [0.941, 0.178, 0.584]]) """ u = rank_data(data, 1, ties) / (len(data) + 1) if isinstance(data, pd.DataFrame): u = pd.DataFrame(u, index=data.index, columns=data.columns) return u
[docs]def rank_data(obs: np.ndarray, axis=0, ties='average'): """ Assign ranks to data, dealing with ties appropriately. This function works on core as well as vectors Parameters ---------- obs Data to be ranked. Can only be 1 or 2 dimensional. axis: {0, 1}, optional The axis to perform the ranking. 0 means row, 1 means column. ties: { 'average', 'min', 'max', 'dense', 'ordinal' }, default 'average' The method used to assign ranks to tied elements. The options are 'average', 'min', 'max', 'dense' and 'ordinal'. Returns ------- numpy array matrix or vector of the same dimension as X containing the pseudo observations See Also -------- :py:func:`~copulae.core.misc.pseudo_obs` The pseudo-observations function """ obs = np.asarray(obs) ties = ties.lower() assert obs.ndim in (1, 2), "Data can only be 1 or 2 dimensional" if obs.ndim == 1: return stats.rankdata(obs, ties) elif obs.ndim == 2: if axis == 0: return np.array([stats.rankdata(obs[i, :], ties) for i in range(obs.shape[0])]) elif axis == 1: return np.array([stats.rankdata(obs[:, i], ties) for i in range(obs.shape[1])]).T else: raise ValueError(f"No axis named 3 for object type {type(obs)}")
def tri_indices(n: int, m=0, side: Literal['lower', 'upper', 'both'] = 'both') -> Tuple[np.ndarray, np.ndarray]: """ Return the indices for the triangle of an (n, n) array Parameters ---------- n : int dimension of square matrix m : int offset side : { 'lower', 'upper', 'both' } Side of triangle to return. Supported values are 'lower', 'upper', 'both' Returns ------- (numpy array, numpy array) Tuple of row indices and column indices Examples -------- >>> import numpy as np >>> from copulae.core import tri_indices >>> x = np.arange(9).reshape(3, 3) # To get lower indices of matrix >>> x[tri_indices(3, 1, 'lower')] array([3, 6, 7]) # To form covariance matrix >>> c = np.eye(3) >>> c[tri_indices(3, 1)] = np.tile([0.1, 0.2, 0.3], 2) >>> c array([[1. , 0.1, 0.2], [0.1, 1. , 0.3], [0.2, 0.3, 1. ]]) """ side = side.lower() if side not in {'lower', 'upper', 'both'}: raise ValueError("side option must be one of 'lower', 'upper' or 'both'") l_i = [], [] # lower indices if side in {'lower', 'both'}: for i in range(n - m): for j in range(i + m, n): l_i[1].append(i) l_i[0].append(j) if side == 'lower': return np.array(l_i[0]), np.array(l_i[1]) u_i = [[], []] # upper indices if side in {'upper', 'both'}: for i in range(n - m): for j in range(i + m, n): u_i[0].append(i) u_i[1].append(j) if side == 'upper': return np.array(u_i[0]), np.array(u_i[1]) return np.array([*u_i[0], *l_i[0]]), np.array([*u_i[1], *l_i[1]])