Source code for la_forge.utils

import glob

import numpy as np
import scipy.stats as sps
from scipy import interpolate as interp
from scipy.ndimage import filters as filter
import matplotlib.pyplot as plt

try:
    from enterprise.pulsar import Pulsar
    ent_present = True
except ImportError:
    ent_present = False


fyr = 1./31536000.


# from Kristina Islo

[docs]def getMax2d(samples1, samples2, weights=None, smooth=True, bins=[40, 40],
             x_range=None, y_range=None, logx=False, logy=False, logz=False):
    """ Function to return the maximum likelihood values by interpolating over
    a two dimensional histogram made of two sets of samples.

    Parameters
    ----------

    samples1, samples2 : array or list
        Arrays or lists from which to find two dimensional maximum likelihood
        values.

    weights : array of floats
        Weights to use in histogram.

    bins : list of ints
        List of 2 integers which dictates number of bins for samples1 and
        samples2.

    x_range : tuple, optional
        Range of samples1

    y_range : tuple, optional
        Range of samples2

    logx : bool, optional
        A value of True use log10 scale for samples1.

    logy : bool, optional
        A value of True use log10 scale for samples2.

    logz : bool, optional
        A value of True indicates that the z axis is in log10.

    """

    if x_range is None:
        xmin = np.amin(samples1)
        xmax = np.amax(samples1)
    else:
        xmin = x_range[0]
        xmax = x_range[1]

    if y_range is None:
        ymin = np.amin(samples2)
        ymax = np.amax(samples2)
    else:
        ymin = y_range[0]
        ymax = y_range[1]

    if logx:
        bins[0] = np.logspace(np.log10(xmin), np.log10(xmax), bins[0])

    if logy:
        bins[1] = np.logspace(np.log10(ymin), np.log10(ymax), bins[1])

    hist2d, xedges, yedges = np.histogram2d(samples1, samples2, weights=weights,
                                            bins=bins,
                                            range=[[xmin, xmax], [ymin, ymax]])
    # extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]

    if logz:
        hist2d = np.where(hist2d >= 0, hist2d, 1)

    xedges = np.delete(xedges, -1) + 0.5*(xedges[1] - xedges[0])
    yedges = np.delete(yedges, -1) + 0.5*(yedges[1] - yedges[0])

    # gaussian smoothing
    if smooth:
        hist2d = filter.gaussian_filter(hist2d, sigma=0.75)

    # interpolation
    f = interp.interp2d(xedges, yedges, hist2d, kind='cubic')
    xedges = np.linspace(xedges.min(), xedges.max(), 2000)
    yedges = np.linspace(yedges.min(), yedges.max(), 2000)
    hist2d = f(xedges, yedges)

    # return xedges[np.argmax(hist2d)]
    ind = np.unravel_index(np.argmax(hist2d), hist2d.shape)
    return xedges[ind[0]], yedges[ind[1]]


[docs]def get_params_2d_mlv(core, par1, par2):
    """Convenience function for finding two dimensional maximum likelihood
    value for any two parameters.
    """

    samps1 = core.get_param(par1, to_burn=True)
    samps2 = core.get_param(par2, to_burn=True)

    return getMax2d(samps1, samps2)


[docs]def get_rn_noise_params_2d_mlv(core, pulsar):
    """Convenience function to find 2d rednoise maximum likelihood values.
    """
    rn_amp = pulsar + '_log10_A'
    rn_si = pulsar + '_gamma'

    return get_params_2d_mlv(core, rn_amp, rn_si)


[docs]def get_Tspan(pulsar, datadir):
    """Returns timespan of a pulsars dataset by loading the pulsar as an
    `enterprise.Pulsar()` object.

    Parameters
    ----------
    pulsar : str

    datadir : str
        Directory where `par` and `tim` files are found.
    """
    if not ent_present:
        raise ImportError('enterprise is not available for import. '
                          'Please provide time span of data in another form.')
    parfile = glob.glob(datadir + '/{0}*.par'.format(pulsar))[0]
    timfile = glob.glob(datadir + '/{0}*.tim'.format(pulsar))[0]

    psr = Pulsar(parfile, timfile, ephem='{0}'.format('DE436'))

    T = psr.toas.max() - psr.toas.min()

    return T


[docs]def compute_rho(log10_A, gamma, f, T):
    """
    Converts from power to residual RMS.
    """

    return np.sqrt((10**log10_A)**2 / (12.0*np.pi**2)
                   * fyr**(gamma-3) * f**(-gamma) / T)


[docs]def bayes_fac(samples, ntol=200, logAmin=-18, logAmax=-12,
              nsamples=100, smallest_dA=0.01, largest_dA=0.1):
    """
    Computes the Savage Dickey Bayes Factor and uncertainty. Based on code in
    enterprise_extensions. Expanded to include more options for when there are
    very few samples at lower amplitudes.

    :param samples: MCMC samples of GWB (or common red noise) amplitude
    :param ntol: Tolerance on number of samples in bin
    :param logAmin: Minimum log amplitude being considered.
    :param logAmax: Maximum log amplitude being considered.
    :returns: (bayes factor, 1-sigma bayes factor uncertainty)
    """

    prior = 1 / (logAmax - logAmin)
    dA = np.linspace(smallest_dA, largest_dA, nsamples)
    bf = []
    bf_err = []
    mask = []  # selecting bins with more than ntol samples
    N = len(samples)

    for ii, delta in enumerate(dA):
        n = np.sum(samples <= (logAmin + delta))
        post = n / N / delta

        bf.append(prior/post)
        bf_err.append(bf[ii]/np.sqrt(n))

        if n >= ntol:
            mask.append(ii)
    # Parse various answers depending on how well
    # we can calculate the SD BF
    # WARNING
    if all([val!=np.inf for val in bf]):
        return (np.mean(np.array(bf)[mask]),
                np.std(np.array(bf)[mask]))
    elif all([val==np.inf for val in bf]):
        post = 1 / N / smallest_dA
        print('Not enough samples at low amplitudes.\n'
              'Can only set lower limit on Savage-Dickey'
              'Bayes Factor!!')
        return prior/post, np.nan
    else:
        print('Not enough samples in all bins.'
              'Calculating mean by ignoring np.nan.')
        return (np.nanmean(np.array(bf)[mask]),
                np.nanstd(np.array(bf)[mask]))


fyr = 1/(365.25*24*3600)


[docs]def rn_power(amp, gamma=None, freqs=None, T=None, sum_freqs=True):
    """Calculate the power in a red noise signal assuming the
    P=A^2(f/f_yr)^-gamma form. """
    if gamma is None and freqs is None and amp.ndim>1:
        if T is None:
            raise ValueError('Must provide timespan for power calculation.')
        power = (10**amp)**2 * T
    else:
        power = (10**amp[:, np.newaxis])**2 \
            * (np.array(freqs)/fyr)**-gamma[:, np.newaxis] \
            * (1/fyr)**3 /(12*np.pi**2)
    if sum_freqs:
        return np.sum(power, axis=1)
    else:
        return power


[docs]def powerlaw(freqs, log10_A=-16, gamma=5):
    df = np.diff(np.concatenate((np.array([0]), freqs)))
    return ((10**log10_A)**2 / 12.0 / np.pi**2
            * fyr**(gamma-3) * freqs**(-gamma) * np.repeat(df, 2))


[docs]def weighted_quantile(values, quantiles, sample_weight=None,
                      values_sorted=False, old_style=False):
    """
    Very close to numpy.percentile, but supports weights.
    NOTE: quantiles should be in [0, 1]!
    [From Max Ghenis via Stack Overflow: https://stackoverflow.com/questions/21844024/weighted-percentile-using-numpy]

    Parameters
    ----------

    values : numpy.array
        The data.

    quantiles : array-like
        Many quantiles needed.

    sample_weight : array-like
        Samples weights. The same length as `array`.

    values_sorted : bool
        If True, then will avoid sorting of initial array.

    old_style: bool
        If True, will correct output to be consistent with numpy.percentile.

    Returns
    -------
    computed quantiles : numpy.array

    """
    values = np.array(values)
    quantiles = np.array(quantiles)
    if sample_weight is None:
        sample_weight = np.ones(len(values))
    sample_weight = np.array(sample_weight)
    assert np.all(quantiles >= 0) and np.all(quantiles <= 1), \
        'quantiles should be in [0, 1]'
    if not values_sorted:
        sorter = np.argsort(values)
        values = values[sorter]
        sample_weight = sample_weight[sorter]
    weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight
    if old_style:
        # To be convenient with numpy.percentile
        weighted_quantiles -= weighted_quantiles[0]
        weighted_quantiles /= weighted_quantiles[-1]
    else:
        weighted_quantiles /= np.sum(sample_weight)
    return np.interp(quantiles, weighted_quantiles, values)


[docs]def quantize_fast(toas, residuals, toaerrs, dt=0.1):  # flags=None,
    r"""
    Function to quantize and average TOAs by observation epoch. Used especially
    for NANOGrav multiband data.

    Based on `[3]`_.

    .. _[3]: https://github.com/vallis/libstempo/blob/master/libstempo/toasim.py

    Parameters
    ----------

    toas : array

    residuals : array

    toaerrs : array

    dt : float
        Coarse graining time [sec].
    """
    isort = np.argsort(toas)

    bucket_ref = [toas[isort[0]]]
    bucket_ind = [[isort[0]]]
    for i in isort[1:]:
        if toas[i] - bucket_ref[-1] < dt:
            bucket_ind[-1].append(i)
        else:
            bucket_ref.append(toas[i])
            bucket_ind.append([i])

    averesids = np.array([np.average(residuals[l],
                                     weights=np.power(toaerrs[l], -2))
                          for l in bucket_ind], 'd')
    residRMS = np.array([np.sqrt(np.mean(np.square(residuals[l])))
                         for l in bucket_ind], 'd')
    avetoas = np.array([np.mean(toas[l]) for l in bucket_ind], 'd')
    avetoaerrs = np.array([sps.hmean(toaerrs[l]) for l in bucket_ind], 'd')
    output = np.array([avetoas, averesids, avetoaerrs, residRMS]).T
    return output


[docs]def epoch_ave_resid(psr, correction=None, dt=10):
    """
    Epoch averaged residuals organized by receiver.

    Parameters
    ----------
    psr :  `enterprise.pulsar.Pulsar`

    correction : array, optional
        Numpy array which gives a correction to the residuals. Used for adding
        various Gaussian process realizations or timing model perturbations.

    dt : float
        Coarse graining time [sec]. Sets filter size for TOAs.

    Returns
    -------
    fe_resids : dict of arrays
        Dictionary where each entry is an array of epoch averaged TOAS,
        residuals and TOA errors. Keys are the various receivers.

    fe_mask : dict of arrays
        Dictionary where each entry is an array that asks as a mask for the
        receiver used as a key.
    """
    ng_frontends=['327', '430', 'Rcvr_800', 'Rcvr1_2', 'L-wide', 'S-wide']
    fe_masks = {}
    fe_resids = {}
    psr_fe = np.unique(psr.flags['fe'])
    if correction is None:
        resids = psr.residuals
    else:
        resids = psr.residuals - correction

    for fe in ng_frontends:
        if fe in psr_fe:
            fe_masks[fe] = np.array(psr.flags['fe']==fe)
            mk = fe_masks[fe]
            fe_resids[fe] = quantize_fast(psr.toas[mk], resids[mk],
                                          psr.toaerrs[mk], dt=dt)
    return fe_resids, fe_masks

################## Plot Parameters ############################


[docs]def figsize(scale):
    fig_width_pt = 513.17  # 469.755    # Get this from LaTeX using \the\textwidth
    inches_per_pt = 1.0 / 72.27         # Convert pt to inch
    golden_mean = (np.sqrt(5.0)-1.0)/2.0    # Aesthetic ratio
    fig_width = fig_width_pt * inches_per_pt * scale  # width in inches
    fig_height = fig_width*golden_mean             # height in inches
    fig_size = [fig_width, fig_height]
    return fig_size


[docs]def set_publication_params(param_dict=None, scale=0.5):
    plt.rcParams.update(plt.rcParamsDefault)
    params = {'backend': 'pdf',
              'axes.labelsize': 10,
              'lines.markersize': 4,
              'font.size': 10,
              'xtick.major.size': 6,
              'xtick.minor.size': 3,
              'ytick.major.size': 6,
              'ytick.minor.size': 3,
              'xtick.major.width': 0.5,
              'ytick.major.width': 0.5,
              'xtick.minor.width': 0.5,
              'ytick.minor.width': 0.5,
              'lines.markeredgewidth': 1,
              'axes.linewidth': 1.2,
              'legend.fontsize': 7,
              'xtick.labelsize': 10,
              'ytick.labelsize': 10,
              'savefig.dpi': 200,
              'path.simplify': True,
              'font.family': 'serif',
              # 'font.serif':'Times New Roman',
              #'text.latex.preamble': [r'\usepackage{amsmath}'],
              'text.usetex': True,
              'figure.figsize': figsize(scale)}

    if param_dict is not None:
        params.update(param_dict)

    plt.rcParams.update(params)