Source code for ocbpy.boundaries.dmsp_ssj_files

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# DOI: 10.5281/zenodo.1179230
# Full license can be found in License.md
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# -----------------------------------------------------------------------------
"""Download and format DMSP SSJ boundary files.

References
----------
.. [2] Angeline Burrell, Christer van der Meeren, & Karl M. Laundal. (2020).
   aburrell/aacgmv2 (All Versions). Zenodo. doi:10.5281/zenodo.1212694.

.. [5] Kilcommons, L.M., et al. (2017), A new DMSP magnetometer and auroral
   boundary data set and estimates of field-aligned currents in dynamic auroral
   boundary coordinates, J. Geophys. Res.: Space Phys., 122, pp 9068-9079,
   doi:10.1002/2016ja023342.

.. [7] Kilcommons, L., Redmon, R., & Knipp, D. (2019). Defense Meteorology
   Satellite Program (DMSP) Electron Precipitation (SSJ) Auroral Boundaries,
   2010-2014 (1.0.0) [Data set]. Zenodo. doi:10.5281/zenodo.3373812

"""

import datetime as dt
import hashlib
import numpy as np
import os
import zipfile

import aacgmv2

import ocbpy

try:
    import zenodo_get
except ImportError as ierr:
    raise ImportError(''.join(['unable to load `zenodo_get` module; avalable ',
                               'from PyPi.\n', str(ierr)]))



[docs]
def fetch_ssj_boundary_files(stime=None, etime=None, out_dir=None,
                             sat_nums=None, doi='10.5281/zenodo.3373811',
                             rm_temp=True):
    """Download DMSP SSJ files and place them in a specified directory.

    Parameters
    ----------
    stime : dt.datetime or NoneType
        Start time or None to get all boundaries (default=None)
    etime : dt.datetime or NoneType
        End time or None to get all boundaries (default=None)
    out_dir : str or NoneType
        Output directory or None to download to ocbpy boundary directory
        (default=None)
    sat_nums : list or NoneType
        Satellite numbers or None for all satellites (default=None)
    doi : str
        DOI for the DMSP SSJ boundary file Zenodo archive [7]_
        (default='10.5281/zenodo.3373811')
    rm_temp : bool
        Remove all files that are not the final boundary files (default=True)

    Returns
    -------
    out_files : list
        List of filenames corresponding to downloaded boundary files

    Raises
    ------
    ValueError
        If an unknown satellite ID, DOI, or output directory is provided.
    IOError
        If unable to donwload the target archive and identify the zip file
    ImportError
        If called and zenodo_get is not available

    Notes
    -----
    If a file already exists, the routine will add the file to the output list
    without downloading it again.

    """
    # Test the requested satellite outputs. SSJ5 was carried on F16 onwards.
    # F19 was short lived, F20 was not launched. Ref:
    # https://space.skyrocket.de/doc_sdat/dmsp-5d3.htm
    known_sats = [16, 17, 18]

    # Ensure the input parameters are appropriate
    get_all = False
    if sat_nums is None:
        sat_nums = known_sats

        if stime is None and etime is None:
            get_all = True

    if not np.all([snum in known_sats for snum in sat_nums]):
        raise ValueError("".join(["unknown satellite ID in ",
                                  "{:} use {:}".format(sat_nums, known_sats)]))

    # Get and test the output directory
    if out_dir is None:
        out_dir = ocbpy.boundaries.files.get_boundary_directory()

    if not os.path.isdir(out_dir):
        raise ValueError("can't find the output directory")

    # Download the zenodo archive
    zenodo_get.download(doi=doi, output_dir=out_dir)

    # Get the checksum file (does not download)
    zenodo_get.download(doi=doi, output_dir=out_dir, md5=True)
    zenodo_checksum = os.path.join(out_dir, "md5sums.txt")

    # Verify the checksum and retrieve the zip archive name
    if os.path.isfile(zenodo_checksum):
        with open(zenodo_checksum, 'r') as zcheck:
            csum, zip_name = zcheck.read().split()

            # Set the archive name
            archive_name = os.path.join(out_dir, zip_name)

            if not os.path.isfile(archive_name):
                raise IOError(
                    'error downloading archive to output dir: {:}'.format(
                        archive_name))

            # Get the archive checkshum
            with open(os.path.join(out_dir, archive_name), 'rb') as afile:
                asum = hashlib.md5(afile.read()).hexdigest()

        zen_msg = "Checksum is correct" if asum == csum else "Corrupted data"
    else:
        zen_msg = "Checksum file not created"

    # Remove the checksum file
    if zenodo_checksum is not None:
        os.remove(zenodo_checksum)

    # Evaluate checksum output
    if zen_msg.find('Checksum is correct') < 0 and zen_msg.find(
            'already downloaded correctly') < 0:
        raise IOError('Bad checksum: {:s}'.format(zen_msg))

    # Access the zip archive
    with zipfile.ZipFile(archive_name, 'r') as zref:
        namelist = zref.namelist()

        if get_all:
            # If all boundaries are desired, extract all
            zref.extractall(out_dir)
            out_files = [os.path.join(out_dir, fname) for fname in namelist]
        else:
            # Otherwise, extract files for the desired satellites and times
            out_files = list()

            for fname in namelist:
                if evaluate_dmsp_boundary_file(fname, stime, etime, sat_nums):
                    out_files.append(zref.extract(fname, path=out_dir))

    # Remove the zip archive, if desired
    if rm_temp:
        os.remove(archive_name)

    # Return list of available files for these satellites and times
    return out_files




[docs]
def evaluate_dmsp_boundary_file(fname, stime, etime, sat_nums):
    """Evaluate a boundary file name for time range and satellite ID.

    Parameters
    ----------
    fname : str
        Filename with the format:
        dmsp-fII_ssj_precipitating-electrons-ions_YYYYMMDD_vXXX_boundaries.csv
        where II is the satellite ID, YYYY is the year, MM is the month, DD
        is the day of month, and XXX is the version number.
    stime : dt.datetime or NoneType
        Start time or None to get all boundaries
    etime : dt.datetime or NoneType
        End time or None to get all boundaries
    sat_nums : list
        Desired satellite ID numbers

    Returns
    -------
    good_file : bool
        True if the satellite has the desired ID and time, False otherwise

    """
    # Initialize the output
    good_file = False

    # Ensure the time bounds will result in the desired range with an evaluation
    if stime is None:
        stime = dt.datetime(1900, 1, 1)  # A time before DMSP launched

    if etime is None:
        etime = dt.datetime.utcnow()  # A time beyond data uploads

    # Get the file satelite ID
    sat_id = int(fname.split('dmsp-f')[-1].split('_ssj')[0])

    if sat_id in sat_nums:
        # Test the file time
        if stime is None and etime is None:
            good_file = True
        else:
            # Get the file time
            ftime = dt.datetime.strptime(fname.split('_')[3], '%Y%m%d')

            if (stime is None and ftime < etime) or (
                    etime is None and ftime >= stime) or (
                        ftime >= stime and ftime < etime):
                good_file = True

    return good_file




[docs]
def format_ssj_boundary_files(csv_files, ref_alt=830.0,
                              method='GEOCENTRIC|ALLOWTRACE'):
    """Create SSJ boundary files for a list of DMSP SSJ daily satellite files.

    Parameters
    ----------
    csv_files : list
        List of SSJ CSV boundary files with directory structure
    ref_alt : float
        Reference altitude for boundary locations in km (default=830.0)
    method : str
        AACGMV2 method, may use 'TRACE', 'ALLOWTRACE', 'BADIDEA', 'GEOCENTRIC'
        [2]_ (default='GEOCENTRIC|ALLOWTRACE')

    Returns
    -------
    bound_files : list
        List of successfully updated .csv boundary files

    Notes
    -----
    Output format is 'sc date time r x y fom x_1 y_1 x_2 y_2'
    where:

    ===== ==================================================================
     sc    Spacecraft number
     date  YYYY-MM-DD
     time  HH:MM:SS of midpoint between the two measurements for this pass
     r     Half the distance between the two pass boundaries
     x     Distance between the midpoint of the two pass boundaries
           and the AACGMV2 pole in degrees along the dusk-dawn meridian
     y     distance between the midpoint of the two pass boundaries and the
           AACGMV2 pole in degrees along the midnight-noon meridian
     fom   FOM for the boundaries found along this pass
     x_1   x coordinate of the first boundary
     y_1   y coordinate of the first boundary
     x_2   x coordinate of the second boundary
     y_2   y coordinate of the second boundary
    ===== ==================================================================

    Because two points are not enough to define the OCB or EAB across all local
    times, a circle that intersects the two boundary pass points is defined and
    the boundary location saved.  The DMSP SSJ boundary correction function
    will use this information to only return values within a small distance of
    the boundary locations [5]_, [7]_.

    Separate files are created for each boundary and hemisphere, dates and
    spacecraft are combined.

    See Also
    --------
    aacgmv2

    """
    # Error catch for input being a filename
    csv_files = np.asarray(csv_files)
    if len(csv_files.shape) == 0:
        csv_files = np.asarray([csv_files])

    # Remove any bad files
    good_files = list()
    for i, infile in enumerate(csv_files):
        if not os.path.isfile(infile):
            ocbpy.logger.warning("bad input file: {:}".format(infile))
        else:
            good_files.append(i)
    csv_files = csv_files[good_files]

    if len(csv_files) == 0:
        raise ValueError("empty list of input CSV files")

    # Set the hemisphere suffix and boundary prefix
    hemi_suffix = {1: "north", -1: "south"}
    bound_prefix = {'PO': '.ocb', 'EQ': '.eab'}

    # Initialize the file lists
    bad_files = list()

    # Initialize the output header
    out_head = "#sc date time r x y fom x_1 y_1 x_2 y_2\n"

    # Specify the output file information
    outfile_prefix = os.path.commonprefix(list(csv_files)).split('-f')[0]
    filename_sec = os.path.split(
        csv_files[0])[-1].split('dmsp-f')[-1].split('_')
    sdate = filename_sec[3]
    filename_sec = os.path.split(
        csv_files[-1])[-1].split('dmsp-f')[-1].split('_')
    edate = filename_sec[3]

    bound_files = {hh: {bb: "".join([outfile_prefix, "-", filename_sec[1], "_",
                                     hemi_suffix[hh], "_", sdate, "_", edate,
                                     "_", filename_sec[4], bound_prefix[bb]])
                        for bb in bound_prefix.keys()}
                   for hh in hemi_suffix.keys()}
    fpout = {hh: {bb: None for bb in bound_prefix.keys()}
             for hh in hemi_suffix.keys()}

    with open(bound_files[1]['PO'], 'w') as fpout[1]['PO'], \
         open(bound_files[-1]['PO'], 'w') as fpout[-1]['PO'], \
         open(bound_files[1]['EQ'], 'w') as fpout[1]['EQ'], \
         open(bound_files[-1]['EQ'], 'w') as fpout[-1]['EQ']:
        # Output the header in each file
        for hh in hemi_suffix.keys():
            for bb in bound_prefix.keys():
                fpout[hh][bb].write(out_head)

        # Cycle through all the SSJ CSV files, outputing appropriate data into
        # the desired boundary and hemisphere file
        for infile in csv_files:
            # Get spacecraft number and date from filename
            filename_sec = os.path.split(infile)[-1].split(
                'dmsp-f')[-1].split('_')
            sc = int(filename_sec[0])
            file_date = dt.datetime.strptime(filename_sec[3], '%Y%m%d')

            # Get the header line for the data and determine the number of
            # comment lines preceeding the header
            skiprows = 1
            with open(infile, 'r') as fpin:
                head_line = fpin.readline()
                while head_line.find("#") == 0:
                    skiprows += 1
                    head_line = fpin.readline()

            header_list = head_line.split("\n")[0].split(",")

            # Load the file data
            data = np.loadtxt(infile, skiprows=skiprows, delimiter=',')
            if len(data.shape) != 2 or data.shape[1] != len(header_list):
                bad_files.append(infile)
            else:
                # Establish the desired data indices
                time_ind = {bb: [header_list.index('UTSec {:s}{:d}'.format(
                    bb, i)) for i in [1, 2]] for bb in bound_prefix.keys()}
                lat_ind = {bb:
                           [header_list.index(
                               'SC_GEOCENTRIC_LAT {:s}{:d}'.format(bb, i))
                            for i in [1, 2]] for bb in bound_prefix.keys()}
                lon_ind = {bb:
                           [header_list.index(
                               'SC_GEOCENTRIC_LON {:s}{:d}'.format(bb, i))
                            for i in [1, 2]] for bb in bound_prefix.keys()}

                # Calculate the midpoint seconds of day
                mid_utsec = {bb: 0.5 * (data[:, time_ind[bb][1]]
                                        + data[:, time_ind[bb][0]])
                             for bb in time_ind.keys()}

                # Select the hemisphere and FOM
                hemi = data[:, header_list.index('hemisphere')]
                fom = data[:, header_list.index('FOM')]

                # Cycle through each line of data, calculating the
                # necessary information
                for iline, data_line in enumerate(data):
                    hh = hemi[iline]

                    # Get the boundary locations using the midpoint time
                    # (sufficiently accurate at current sec. var.) for each
                    # boundary
                    for bb in bound_prefix.keys():
                        mid_time = file_date + dt.timedelta(
                            seconds=mid_utsec[bb][iline])
                        mloc = aacgmv2.get_aacgm_coord_arr(
                            data_line[lat_ind[bb]], data_line[lon_ind[bb]],
                            ref_alt, mid_time, method=method)

                        # Get the X-Y coordinates of each pass where X is
                        # positive towards dawn and Y is positive towards noon
                        theta = ocbpy.ocb_time.hr2rad(mloc[2] - 6.0)
                        x = (90.0 - abs(mloc[0])) * np.cos(theta)
                        y = (90.0 - abs(mloc[0])) * np.sin(theta)

                        # Get the distance between the Cartesian points
                        rad = np.sqrt((x[0] - x[1])**2 + (y[0] - y[1])**2) / 2.0

                        # The midpoint is the center of this circle
                        mid_x = 0.5 * sum(x)
                        mid_y = 0.5 * sum(y)

                        # Prepare the output line, which has the format:
                        # sc bound hemi date time r x y fom x_1 y_1
                        # x_2 y_2
                        out_line = " ".join(
                            ["{:d}".format(sc),
                             mid_time.strftime('%Y-%m-%d %H:%M:%S'),
                             " ".join(["{:.3f}".format(val)
                                       for val in [rad, mid_x, mid_y,
                                                   fom[iline], x[0], y[0],
                                                   x[1], y[1]]]), "\n"])

                        # Write the output line to the file
                        fpout[hh][bb].write(out_line)

    # If some input files were not processed, inform the user
    if len(bad_files) > 0:
        ocbpy.logger.warning("unable to format {:d} input files: {:}".format(
            len(bad_files), bad_files))

    # Recast the output file dictionary as a flat list
    bound_files = np.array([[fname for fname in ff.values()]
                            for ff in bound_files.values()])

    return list(bound_files.flatten())




[docs]
def fetch_format_ssj_boundary_files(stime, etime, out_dir=None, rm_temp=True,
                                    ref_alt=830.0,
                                    method='GEOCENTRIC|ALLOWTRACE'):
    """Download DMSP SSJ data and create boundary files for each hemisphere.

    Parameters
    ----------
    stime : dt.datetime or NoneType
        Start time or NoneType to fetch all
    etime : dt.datetime
        End time or NoneType to fetch all
    out_dir : str or NoneType
        Output directory or None to download to ocbpy boundary directory
        (default=None)
    rm_temp : bool
        Remove all files that are not the final boundary files (default=True)
    ref_alt : float
        Reference altitude for boundary locations in km (default=830.0)
    method : str
        AACGMV2 method, may use 'TRACE', 'ALLOWTRACE', 'BADIDEA', 'GEOCENTRIC'
        [2]_ (default='GEOCENTRIC|ALLOWTRACE')

    Returns
    -------
    bound_files : list
        List of the boundary file names

    Raises
    ------
    ValueError
        If the DMSP SSJ files could not be downloaded

    See Also
    --------
    aacgmv2, requests.exceptions.ProxyError

    """
    # Fetch the DMSP SSJ boundary files from the Zenodo archive
    csv_files = fetch_ssj_boundary_files(stime, etime, out_dir=out_dir,
                                         rm_temp=rm_temp)

    # Test to see if there are any DMSP processed files
    if len(csv_files) == 0:
        raise ValueError("".join(["unable to download the SSJ files from ",
                                  "{:} to {:}".format(stime, etime)]))

    # Create the boundary files
    bound_files = format_ssj_boundary_files(csv_files, ref_alt=ref_alt,
                                            method=method)

    # Remove the CSV files, as their data has been processed
    if rm_temp:
        for tmp_file in csv_files:
            os.remove(tmp_file)

    return bound_files