Source code for pydivide.resample

# Copyright 2017 Regents of the University of Colorado. All Rights Reserved.
# Released under the MIT license.
# This software was developed at the University of Colorado's Laboratory for Atmospheric and Space Physics.
# Verify current version before use at: https://github.com/MAVENSDC/Pydivide

import datetime
from scipy import interpolate
import numpy as np
import pandas as pd

import pytplot as tplot


[docs]def resample(kp, time, sc_only=False):
    '''
    Modifies KP structure index to user specified time via interpolation.

    Parameters:
        kp: struct
            KP insitu data structure read from file(s).
        time: list
            Specifies subset of insitu KP data for resampling. time must be expressed in the format ‘YYYY-MM-DD HH:MM:SS’.

    Examples:
        >>> # Resample insitu time to 2016-06-20 coarse survey 3D file time.
        >>> swi_cdf = cdflib.CDF('<dir_path>/mvn_swi_l2_coarsesvy3d_20160620_v01_r00.cdf')
        >>> newtime = swi_cdf.varget('time_unix')
        >>> insitu_resampled = pydivide.resample(insitu, newtime)

        >>> # Resamples an entire day of data to just 3 points
        >>> import pytplot
        >>>insitu, iuvs = pydivide.read(input_time=['2016-02-18', '2016-02-19'])
        >>> x = pydivide.resample(insitu, [pytplot.tplot_utilities.str_to_int('2016-02-18T05:00:00'),
        >>>                      pytplot.tplot_utilities.str_to_int('2016-02-18T10:00:00'),
        >>>                      pytplot.tplot_utilities.str_to_int('2016-02-18T15:00:00')])
    '''
    new_total = len(time)
    start_time = time[0]
    end_time = time[new_total - 1]
    
    if start_time < kp['Time'][0]:
        print('The requested start time is before the earliest data point in the input data structure.')
        print('This routine DOES NOT extrapolate. Please read in more KP data that covers the requested time span.')
        return
    
    if end_time > kp['Time'][len(kp['Time']) - 1]:
        print('The requested end time is after the latest data point in the input data structure.')
        print('This routine DOES NOT extrapolate. Please read in more KP data that covers the requested time span.')
        return

    # Set up the instrument dataframes
    if kp['LPW'] is not None:
        lpw = kp['LPW']
    else:
        lpw = None
    if kp['MAG'] is not None:
        mag = kp['MAG']
    else:
        mag = None
    if kp['EUV'] is not None:
        euv = kp['EUV']
    else:
        euv = None
    if kp['SWEA'] is not None:
        swea = kp['SWEA']
    else:
        swea = None
    if kp['SWIA'] is not None:
        swia = kp['SWIA']
    else:
        swia = None
    if kp['NGIMS'] is not None:
        ngims = kp['NGIMS']
    else:
        ngims = None
    if kp['SEP'] is not None:
        sep = kp['SEP']
    else:
        sep = None
    if kp['STATIC'] is not None:
        static = kp['STATIC']
    else:
        static = None
        
    app = kp['APP']
    spacecraft = kp['SPACECRAFT']
    io_flag = kp['IOflag']
    orbit = kp['Orbit']
    time_orig = kp['TimeString']
    timeunix = kp['Time']

    # Set up instrument list to make it easy to loop through the next parts
    inst_names = ['LPW', 'EUV', 'SWEA', 'SWIA', 'STATIC', 'SEP', 'MAG', 'NGIMS', 'APP', 'SPACECRAFT']
    inst_tags = [lpw, euv, swea, swia, static, sep, mag, ngims, app, spacecraft]

    # Create an array of the old times
    old_time = timeunix

    # Find the closest values for all nearest neighbor interpolations
    closest_values = []
    for k in range(len(time)):
        if isinstance(time[k], str):
            time_ = tplot.tplot_utilities.str_to_int(time[k])
        else:
            time_ = time[k]
        closest_value_index = np.absolute(old_time.values - time_).argmin()
        closest_values.append((old_time.values - time_)[closest_value_index] + time_)

    # Get the new indexes of the dataframes based on the time
    new_time_strings = []
    for i in range(len(time)):
        if isinstance(time[i], str):
            new_time_strings.append(time[i])
        else:
            new_time_strings.append(datetime.datetime.utcfromtimestamp(time[i], ).strftime('%Y-%m-%dT%H:%M:%S'))
    new_time_series = pd.Series(new_time_strings)
    
    # Orbit Series
    spline_function = interpolate.interp1d(old_time.values, orbit.values)
    # In order for the spline_function to work, need to make sure that we're working with the integer form of time,
    # in seconds since the epoch, not datetime times
    if not isinstance(time[0], int):
        time_int = [tplot.tplot_utilities.str_to_int(t) for t in time]
        temp_series = pd.Series(spline_function(time_int))
    elif isinstance(time[0], int):
        if not isinstance(time, list):
            time.values = list(time)
        temp_series = pd.Series(spline_function(time))

    temp_df = temp_series.to_frame('Orbit')
    temp_df['Time Index'] = new_time_series
    temp_df.set_index('Time Index', drop=True, inplace=True)
    orbit = temp_df.iloc[:, 0]
        
    # Time String Series
    temp_series = new_time_series
    temp_df = temp_series.to_frame('Time')
    temp_df['Time Index'] = new_time_series
    temp_df.set_index('Time Index', drop=True, inplace=True)
    time = temp_df.iloc[:, 0]
        
    # Time Unix Series
    temp_series = pd.Series(time)
    temp_df = temp_series.to_frame('Time')
    temp_df['Time Index'] = new_time_series
    temp_df.set_index('Time Index', drop=True, inplace=True)
    timeunix = temp_df.iloc[:, 0]
        
    # IOFlag Series
    temp_series = []
    for k in range(len(time)):
        temp_series.append(kp['IOflag'][kp['Time'] == closest_values[k]])
    temp_series = pd.Series(temp_series)
    temp_df = temp_series.to_frame('IOflag')
    temp_df['Time Index'] = new_time_series
    temp_df.set_index('Time Index', drop=True, inplace=True)
    io_flag = temp_df.iloc[:, 0]

    # Instrument Dataframes
    
    # For each instrument:
    for i in range(len(inst_names)):
        if inst_tags[i] is not None:
            dataframe_initalized = False
            # For each observation mode:
            for obs in kp[inst_names[i]].columns:
                column_is_string = False
                # Check if the observation is a string variable.
                # If it is a string, you can't interpolate between two strings, so use nearest neighbor.
                # Else, use a cubic spline interpolation
                for j in range(len(kp[inst_names[i]][obs])):
                    # Note: Looking through all of these is REALLY SLOW
                    # Without hard coding the observation names that are strings,
                    # is there a way to make this faster?
                    if isinstance(kp[inst_names[i]][obs][j], str):
                        column_is_string = True
                        break
                    if isinstance(kp[inst_names[i]][obs][j], float) and np.isfinite(kp[inst_names[i]][obs][j]):
                        column_is_string = False
                        break
                if column_is_string:
                    temp_series = []
                    for k in range(len(time)):
                        temp_series.append(kp[inst_names[i]][obs][kp['Time'] == closest_values[k]])
                    temp_series = pd.Series(temp_series)
                else:
                    spline_function = interpolate.interp1d(old_time.values, kp[inst_names[i]][obs].values)

                    # In order for the spline_function to work, need to make sure that we're working with the integer
                    # form of time, in seconds since the epoch, not datetime times
                    if not isinstance(time.values[0], int):
                        time_int = [tplot.tplot_utilities.str_to_int(t) for t in time.values]
                        temp_series = pd.Series(spline_function(time_int))
                    elif isinstance(time.values[0], int):
                        if not isinstance(time.values, list):
                            time.values = list(time.values)
                        temp_series = pd.Series(spline_function(time.values))
                # Turn the series into a dataframe if it hasn't been done yet.
                # Else, add it to the dataframe
                if not dataframe_initalized:
                    temp_df = temp_series.to_frame(obs)
                    dataframe_initalized = True
                    temp_df['Time Index'] = new_time_series
                else:
                    temp_df[obs] = temp_series
            # Set the "Time Index" column as the index of the dataframe
            temp_df.set_index('Time Index', drop=True, inplace=True)
            inst_tags[i] = temp_df

    # Set up and return the new KP data structure
    tag_names = ['TimeString', 'Time', 'Orbit', 'IOflag', 'LPW', 'EUV', 'SWEA', 'SWIA', 'STATIC', 'SEP', 'MAG',
                 'NGIMS', 'APP', 'SPACECRAFT']
    # Define list of first level data structures
    data_tags = [time, timeunix, orbit, io_flag,
                 inst_tags[0], inst_tags[1], inst_tags[2], inst_tags[3], inst_tags[4], 
                 inst_tags[5], inst_tags[6], inst_tags[7], inst_tags[8], inst_tags[9]]
    # return a dictionary made from tag_names and data_tags
    kp_new = (dict(zip(tag_names, data_tags)))
    
    return kp_new