Source code for pydivide.bin

# Copyright 2017 Regents of the University of Colorado. All Rights Reserved.
# Released under the MIT license.
# This software was developed at the University of Colorado's Laboratory for Atmospheric and Space Physics.
# Verify current version before use at: https://github.com/MAVENSDC/Pydivide

from .utilities import get_inst_obs_labels
from .utilities import initialize_list
from .utilities import place_values_in_list
from .utilities import get_values_from_list
import math
import numpy


[docs]def bin(kp, parameter=None, bin_by=None, mins=None, maxs=None, binsize=None, std=False, avg=False, density=False, median=False, unittest=False): ''' Bins insitu Key Parameters by up to 8 different parameters, specified within the data structure. Necessary that at least one of avg, std, median, or density be specified. Parameters: kp: struct KP insitu data structure read from file(s). parameter: str Key Parameter to be binned. Only one may be binned at a time. bin_by: int, str Parameters (index or name) by which to bin the specified Key Parameter. binsize: int, list Bin size for each binning dimension. Number of elements must be equal to those in bin_by. mins: int, list Minimum value(s) for each binning scheme. Number of elements must be equal to those in bin_by. maxs: int, list 7 Maximum value(s) for each binning scheme. Number of elements must be equal to those in bin_by. avg: bool Calculate average per bin. std: bool Calculate standard deviation per bin. density: bool Returns number of items in each bin. median: bool Calculate median per bin. Returns: This procedures outputs up to 4 arrays to user-defined variables, corresponding to avg, std, median, and density. Examples: >>> # Bin STATIC O+ characteristic energy by spacecraft latitude (1° resolution) and longitude (2° resolution). >>> output_avg = pydivide.bin(insitu, parameter='static.oplus_char_energy', bin_by=['spacecraft.geo_latitude', 'spacecraft.geo_longitude'], avg=True,binsize=[2,1]) >>> # Bin SWIA H+ density by spacecraft altitude (10km resolution), return average value and standard deviation for each bin. >>> output_avg,output_std = pydivide.bin(insitu, parameter='swia.hplus_density', bin_by='spacecraft.altitude', binsize=10,avg=True,std=True) ''' # ERROR CHECKING if not isinstance(bin_by, list): bin_by = [bin_by] if parameter is None: print("Must provide an index (or name) for param to be plotted.") return if bin_by is None: print("Must provide parameters to be binned by.") return if not avg and not std and not median and not density: print("Must select array(s) to return (avg, std, median, density).") return if not hasattr(binsize, "__len__"): temp = [] temp.append(binsize) binsize = temp if mins is not None and not hasattr(mins, "__len__"): temp = [] temp.append(mins) mins = temp if maxs is not None and not hasattr(maxs, "__len__"): temp = [] temp.append(maxs) maxs = temp # Store instrument and observation of parameter in lists inst = [] obs = [] if type(parameter) is int or type(parameter) is str: a, b = get_inst_obs_labels(kp, parameter) inst.append(a) obs.append(b) else: for param in parameter: a, b = get_inst_obs_labels(kp, param) inst.append(a) obs.append(b) parameter_inst_obs = list(zip(inst, obs)) # Store instrument and observation of "bin by" values in lists inst = [] obs = [] for param in bin_by: a, b = get_inst_obs_labels(kp, param) inst.append(a) obs.append(b) bin_by_inst_obs = list(zip(inst, obs)) # Calculate the dimensions of the binned array # Using the min/max values and the bin sizes total_fields = len(bin_by) ranges = [] total_bins = [] if mins is None: mins = [] for inst, obs in bin_by_inst_obs: min_temp = kp[inst][obs].min(skipna=True) if math.isnan(min_temp): print("All " + obs + " data is NaN. Cannot bin by this parameter.") return mins.append(min_temp) if maxs is None: maxs = [] for inst, obs in bin_by_inst_obs: maxs.append(kp[inst][obs].max(skipna=True)) for i in range(total_fields): if maxs[i] - mins[i] < 0: print("ERROR: Minimum value of " + str(mins[i]) + " is greater than the maximum value of " + str(maxs[i])) print("for bin-by parameter " + bin_by_inst_obs[i][1] + ". Returning...") return ranges.append(maxs[i] - mins[i]) total_bins.append(int(math.ceil(ranges[i] / binsize[i]))) # Initialize the binned_list (a list of every value at a certain bin) # Initialize the density array (the number of values binned into a bin) binned_array = numpy.zeros(total_bins) density_array = numpy.zeros(total_bins) binned_list = binned_array.tolist() binned_list = initialize_list(binned_list) # Loop through the KP to place the data into the correct bin for i in range(len(kp[parameter_inst_obs[0][0]][parameter_inst_obs[0][1]])): bad_val = False # Cannot do anything with NaNs. Ignore them and continue. if math.isnan(kp[parameter_inst_obs[0][0]][parameter_inst_obs[0][1]][i]): continue # Find out where to place i j = 0 data_value_indexes = [] for bin_by_inst, bin_by_obs in bin_by_inst_obs: data_value = kp[bin_by_inst][bin_by_obs][i] # Ignore if NaN or out of range if math.isnan(data_value) or data_value < mins[j] or data_value > maxs[j]: bad_val = True continue dv = math.floor((data_value - mins[j]) / binsize[j]) data_value_indexes.append(int(dv)) j += 1 if bad_val: continue # Populate binned_list in the proper spot, and add one to the density at that spot data_value_indexes = tuple(data_value_indexes) place_values_in_list(binned_list, data_value_indexes, kp[parameter_inst_obs[0][0]][parameter_inst_obs[0][1]][i]) density_array[data_value_indexes] = density_array[data_value_indexes] + 1 # Create arrays based on keywords if median: median_array = numpy.zeros(total_bins) median_array.fill(numpy.nan) if avg: average_array = numpy.zeros(total_bins) average_array.fill(numpy.nan) if std: std_array = numpy.zeros(total_bins) std_array.fill(numpy.nan) # Loop through the KP one more time to calculate median, avg, std. # This is necessary because we cannot calculate the median without knowing all the numbers # in each bin first. for i in range(len(kp[parameter_inst_obs[0][0]][parameter_inst_obs[0][1]])): bad_val = False # Cannot do anything with NaNs. Ignore them and continue. if math.isnan(kp[parameter_inst_obs[0][0]][parameter_inst_obs[0][1]][i]): continue # Find out where to place i j = 0 data_value_indexes = [] for bin_by_inst, bin_by_obs in bin_by_inst_obs: data_value = kp[bin_by_inst][bin_by_obs][i] # Ignore if NaN or out of range if math.isnan(data_value) or data_value < mins[j] or data_value > maxs[j]: bad_val = True continue dv = math.floor((data_value - mins[j]) / binsize[j]) data_value_indexes.append(int(dv)) j += 1 if bad_val: continue # Calculate the mean/median/mode from the values in "output_list" data_value_indexes = tuple(data_value_indexes) if median: # Jenkins server uses old versions of numpy and scipy if unittest: median_array[data_value_indexes] = \ numpy.nanmedian(get_values_from_list(binned_list, data_value_indexes)) else: median_array[data_value_indexes] = \ numpy.nanmedian(get_values_from_list(binned_list, data_value_indexes)) if avg or std: average_array[data_value_indexes] = \ numpy.nansum(get_values_from_list(binned_list, data_value_indexes)) / density_array[data_value_indexes] if std: squared_total = [] for x in get_values_from_list(binned_list, data_value_indexes): squared_total.append((x - average_array[data_value_indexes]) * (x - average_array[data_value_indexes])) std_array[data_value_indexes] = numpy.sqrt((numpy.sum(squared_total) / density_array[data_value_indexes])) # RETURN MEDIAN/AVERAGE/STANDARD DEVIATION return_list = [] if median: return_list.append(median_array) print('Returning binned Medians') if avg: return_list.append(average_array) print('Returning binned Averages') if std: return_list.append(std_array) print('Returning binned standard deviations') if density: return_list.append(density_array) print('Returning binned densities') # Print out a little cheat sheet so people know what is in the array they're getting print('Now returning binned data') dimension = 0 for bin_by_inst, bin_by_obs in bin_by_inst_obs: print('Dimension ' + str(dimension) + ' is ' + bin_by_obs) print(' Range: [' + str(mins[dimension]) + ', ' + str(mins[dimension] + binsize[dimension]) + ', ... ' + str(mins[dimension] + (binsize[dimension] * (total_bins[dimension] - 2))) + ', ' + str(mins[dimension] + (binsize[dimension] * (total_bins[dimension] - 1))) + ']') dimension += 1 return return_list