Source code for ocbpy.boundaries.dmsp_ssj_files

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# DOI: 10.5281/zenodo.1179230
# Full license can be found in License.md
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# -----------------------------------------------------------------------------
"""Download and format DMSP SSJ boundary files.

References
----------
.. [2] Angeline Burrell, Christer van der Meeren, & Karl M. Laundal. (2020).
   aburrell/aacgmv2 (All Versions). Zenodo. doi:10.5281/zenodo.1212694.

.. [5] Kilcommons, L.M., et al. (2017), A new DMSP magnetometer and auroral
   boundary data set and estimates of field-aligned currents in dynamic auroral
   boundary coordinates, J. Geophys. Res.: Space Phys., 122, pp 9068-9079,
   doi:10.1002/2016ja023342.

.. [7] Kilcommons, L., Redmon, R., & Knipp, D. (2019). Defense Meteorology
   Satellite Program (DMSP) Electron Precipitation (SSJ) Auroral Boundaries,
   2010-2014 (1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.3373812

"""

import datetime as dt
from io import StringIO
import numpy as np
import os
import sys
import zipfile

import aacgmv2

import ocbpy

try:
    import zenodo_get
except ImportError as ierr:
    raise ImportError(''.join(['unable to load `zenodo_get` module; avalable ',
                               'from PyPi.\n', str(ierr)]))


[docs] def fetch_ssj_boundary_files(stime=None, etime=None, out_dir=None, sat_nums=None, doi='10.5281/zenodo.3373811', rm_temp=True): """Download DMSP SSJ files and place them in a specified directory. Parameters ---------- stime : dt.datetime or NoneType Start time or None to get all boundaries (default=None) etime : dt.datetime or NoneType End time or None to get all boundaries (default=None) out_dir : str or NoneType Output directory or None to download to ocbpy boundary directory (default=None) sat_nums : list or NoneType Satellite numbers or None for all satellites (default=None) doi : str DOI for the DMSP SSJ boundary file Zenodo archive [7]_ (default='10.5281/zenodo.3373811') rm_temp : bool Remove all files that are not the final boundary files (default=True) Returns ------- out_files : list List of filenames corresponding to downloaded boundary files Raises ------ ValueError If an unknown satellite ID, DOI, or output directory is provided. IOError If unable to donwload the target archive and identify the zip file ImportError If called and zenodo_get is not available Notes ----- If a file already exists, the routine will add the file to the output list without downloading it again. """ # Test the requested satellite outputs. SSJ5 was carried on F16 onwards. # F19 was short lived, F20 was not launched. Ref: # https://space.skyrocket.de/doc_sdat/dmsp-5d3.htm known_sats = [16, 17, 18] # Ensure the input parameters are appropriate get_all = False if sat_nums is None: sat_nums = known_sats if stime is None and etime is None: get_all = True if not np.all([snum in known_sats for snum in sat_nums]): raise ValueError("".join(["unknown satellite ID in ", "{:} use {:}".format(sat_nums, known_sats)])) # Get and test the output directory if out_dir is None: out_dir = ocbpy.boundaries.files.get_boundary_directory() if not os.path.isdir(out_dir): raise ValueError("can't find the output directory") # Download the zenodo archive, capturing the output zenodo_io = StringIO() sys.stdout = zenodo_io sys.stderr = zenodo_io # TODO(#151): remove the old (second) way of calling zenodo_get if hasattr(zenodo_get, "download"): zenodo_get.download(doi=doi, output_dir=out_dir) zenodo_checksum = None else: zenodo_get.zenodo_get([doi, '-o', out_dir]) zenodo_checksum = os.path.join(out_dir, 'md5sums.txt') # Parse the output and retrieve files from the zip archive sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ zen_msg = zenodo_io.getvalue() zen_split = zen_msg.split() if zen_msg.find('Checksum is correct') < 0 and zen_msg.find( 'already downloaded correctly') < 0: raise IOError('Bad checksum: {:s}'.format(zen_msg)) # Remove the checksum file if the download problem wasn't found there if zenodo_checksum is not None: os.remove(zenodo_checksum) # Get the archive name from the output try: link_ind = zen_split.index('Link:') + 1 # If the archive is already available, message may differ zip_name = os.path.split(zen_split[link_ind]) while zip_name[-1].find(".zip") <= 0: zip_name = os.path.split(zip_name[0]) # Set the archive name archive_name = os.path.join(out_dir, zip_name[-1]) except (ValueError, IndexError): raise IOError('unable to identify zenodo archive: {:}'.format(zen_msg)) if not os.path.isfile(archive_name): raise IOError('error downloading archive to output dir: {:}'.format( archive_name)) # Access the zip archive with zipfile.ZipFile(archive_name, 'r') as zref: namelist = zref.namelist() if get_all: # If all boundaries are desired, extract all zref.extractall(out_dir) out_files = [os.path.join(out_dir, fname) for fname in namelist] else: # Otherwise, extract files for the desired satellites and times out_files = list() for fname in namelist: if evaluate_dmsp_boundary_file(fname, stime, etime, sat_nums): out_files.append(zref.extract(fname, path=out_dir)) # Remove the zip archive, if desired if rm_temp: os.remove(archive_name) # Return list of available files for these satellites and times return out_files
[docs] def evaluate_dmsp_boundary_file(fname, stime, etime, sat_nums): """Evaluate a boundary file name for time range and satellite ID. Parameters ---------- fname : str Filename with the format: dmsp-fII_ssj_precipitating-electrons-ions_YYYYMMDD_vXXX_boundaries.csv where II is the satellite ID, YYYY is the year, MM is the month, DD is the day of month, and XXX is the version number. stime : dt.datetime or NoneType Start time or None to get all boundaries etime : dt.datetime or NoneType End time or None to get all boundaries sat_nums : list Desired satellite ID numbers Returns ------- good_file : bool True if the satellite has the desired ID and time, False otherwise """ # Initialize the output good_file = False # Ensure the time bounds will result in the desired range with an evaluation if stime is None: stime = dt.datetime(1900, 1, 1) # A time before DMSP launched if etime is None: etime = dt.datetime.utcnow() # A time beyond data uploads # Get the file satelite ID sat_id = int(fname.split('dmsp-f')[-1].split('_ssj')[0]) if sat_id in sat_nums: # Test the file time if stime is None and etime is None: good_file = True else: # Get the file time ftime = dt.datetime.strptime(fname.split('_')[3], '%Y%m%d') if (stime is None and ftime < etime) or ( etime is None and ftime >= stime) or ( ftime >= stime and ftime < etime): good_file = True return good_file
[docs] def format_ssj_boundary_files(csv_files, ref_alt=830.0, method='GEOCENTRIC|ALLOWTRACE'): """Create SSJ boundary files for a list of DMSP SSJ daily satellite files. Parameters ---------- csv_files : list List of SSJ CSV boundary files with directory structure ref_alt : float Reference altitude for boundary locations in km (default=830.0) method : str AACGMV2 method, may use 'TRACE', 'ALLOWTRACE', 'BADIDEA', 'GEOCENTRIC' [2]_ (default='GEOCENTRIC|ALLOWTRACE') Returns ------- bound_files : list List of successfully updated .csv boundary files Notes ----- Output format is 'sc date time r x y fom x_1 y_1 x_2 y_2' where: ===== ================================================================== sc Spacecraft number date YYYY-MM-DD time HH:MM:SS of midpoint between the two measurements for this pass r Half the distance between the two pass boundaries x Distance between the midpoint of the two pass boundaries and the AACGMV2 pole in degrees along the dusk-dawn meridian y distance between the midpoint of the two pass boundaries and the AACGMV2 pole in degrees along the midnight-noon meridian fom FOM for the boundaries found along this pass x_1 x coordinate of the first boundary y_1 y coordinate of the first boundary x_2 x coordinate of the second boundary y_2 y coordinate of the second boundary ===== ================================================================== Because two points are not enough to define the OCB or EAB across all local times, a circle that intersects the two boundary pass points is defined and the boundary location saved. The DMSP SSJ boundary correction function will use this information to only return values within a small distance of the boundary locations [5]_, [7]_. Separate files are created for each boundary and hemisphere, dates and spacecraft are combined. See Also -------- aacgmv2 """ # Error catch for input being a filename csv_files = np.asarray(csv_files) if len(csv_files.shape) == 0: csv_files = np.asarray([csv_files]) # Remove any bad files good_files = list() for i, infile in enumerate(csv_files): if not os.path.isfile(infile): ocbpy.logger.warning("bad input file: {:}".format(infile)) else: good_files.append(i) csv_files = csv_files[good_files] if len(csv_files) == 0: raise ValueError("empty list of input CSV files") # Set the hemisphere suffix and boundary prefix hemi_suffix = {1: "north", -1: "south"} bound_prefix = {'PO': '.ocb', 'EQ': '.eab'} # Initialize the file lists bad_files = list() # Initialize the output header out_head = "#sc date time r x y fom x_1 y_1 x_2 y_2\n" # Specify the output file information outfile_prefix = os.path.commonprefix(list(csv_files)).split('-f')[0] filename_sec = os.path.split( csv_files[0])[-1].split('dmsp-f')[-1].split('_') sdate = filename_sec[3] filename_sec = os.path.split( csv_files[-1])[-1].split('dmsp-f')[-1].split('_') edate = filename_sec[3] bound_files = {hh: {bb: "".join([outfile_prefix, "-", filename_sec[1], "_", hemi_suffix[hh], "_", sdate, "_", edate, "_", filename_sec[4], bound_prefix[bb]]) for bb in bound_prefix.keys()} for hh in hemi_suffix.keys()} fpout = {hh: {bb: None for bb in bound_prefix.keys()} for hh in hemi_suffix.keys()} with open(bound_files[1]['PO'], 'w') as fpout[1]['PO'], \ open(bound_files[-1]['PO'], 'w') as fpout[-1]['PO'], \ open(bound_files[1]['EQ'], 'w') as fpout[1]['EQ'], \ open(bound_files[-1]['EQ'], 'w') as fpout[-1]['EQ']: # Output the header in each file for hh in hemi_suffix.keys(): for bb in bound_prefix.keys(): fpout[hh][bb].write(out_head) # Cycle through all the SSJ CSV files, outputing appropriate data into # the desired boundary and hemisphere file for infile in csv_files: # Get spacecraft number and date from filename filename_sec = os.path.split(infile)[-1].split( 'dmsp-f')[-1].split('_') sc = int(filename_sec[0]) file_date = dt.datetime.strptime(filename_sec[3], '%Y%m%d') # Get the header line for the data and determine the number of # comment lines preceeding the header skiprows = 1 with open(infile, 'r') as fpin: head_line = fpin.readline() while head_line.find("#") == 0: skiprows += 1 head_line = fpin.readline() header_list = head_line.split("\n")[0].split(",") # Load the file data data = np.loadtxt(infile, skiprows=skiprows, delimiter=',') if len(data.shape) != 2 or data.shape[1] != len(header_list): bad_files.append(infile) else: # Establish the desired data indices time_ind = {bb: [header_list.index('UTSec {:s}{:d}'.format( bb, i)) for i in [1, 2]] for bb in bound_prefix.keys()} lat_ind = {bb: [header_list.index( 'SC_GEOCENTRIC_LAT {:s}{:d}'.format(bb, i)) for i in [1, 2]] for bb in bound_prefix.keys()} lon_ind = {bb: [header_list.index( 'SC_GEOCENTRIC_LON {:s}{:d}'.format(bb, i)) for i in [1, 2]] for bb in bound_prefix.keys()} # Calculate the midpoint seconds of day mid_utsec = {bb: 0.5 * (data[:, time_ind[bb][1]] + data[:, time_ind[bb][0]]) for bb in time_ind.keys()} # Select the hemisphere and FOM hemi = data[:, header_list.index('hemisphere')] fom = data[:, header_list.index('FOM')] # Cycle through each line of data, calculating the # necessary information for iline, data_line in enumerate(data): hh = hemi[iline] # Get the boundary locations using the midpoint time # (sufficiently accurate at current sec. var.) for each # boundary for bb in bound_prefix.keys(): mid_time = file_date + dt.timedelta( seconds=mid_utsec[bb][iline]) mloc = aacgmv2.get_aacgm_coord_arr( data_line[lat_ind[bb]], data_line[lon_ind[bb]], ref_alt, mid_time, method=method) # Get the X-Y coordinates of each pass where X is # positive towards dawn and Y is positive towards noon theta = ocbpy.ocb_time.hr2rad(mloc[2] - 6.0) x = (90.0 - abs(mloc[0])) * np.cos(theta) y = (90.0 - abs(mloc[0])) * np.sin(theta) # Get the distance between the Cartesian points rad = np.sqrt((x[0] - x[1])**2 + (y[0] - y[1])**2) / 2.0 # The midpoint is the center of this circle mid_x = 0.5 * sum(x) mid_y = 0.5 * sum(y) # Prepare the output line, which has the format: # sc bound hemi date time r x y fom x_1 y_1 # x_2 y_2 out_line = " ".join( ["{:d}".format(sc), mid_time.strftime('%Y-%m-%d %H:%M:%S'), " ".join(["{:.3f}".format(val) for val in [rad, mid_x, mid_y, fom[iline], x[0], y[0], x[1], y[1]]]), "\n"]) # Write the output line to the file fpout[hh][bb].write(out_line) # If some input files were not processed, inform the user if len(bad_files) > 0: ocbpy.logger.warning("unable to format {:d} input files: {:}".format( len(bad_files), bad_files)) # Recast the output file dictionary as a flat list bound_files = np.array([[fname for fname in ff.values()] for ff in bound_files.values()]) return list(bound_files.flatten())
[docs] def fetch_format_ssj_boundary_files(stime, etime, out_dir=None, rm_temp=True, ref_alt=830.0, method='GEOCENTRIC|ALLOWTRACE'): """Download DMSP SSJ data and create boundary files for each hemisphere. Parameters ---------- stime : dt.datetime or NoneType Start time or NoneType to fetch all etime : dt.datetime End time or NoneType to fetch all out_dir : str or NoneType Output directory or None to download to ocbpy boundary directory (default=None) rm_temp : bool Remove all files that are not the final boundary files (default=True) ref_alt : float Reference altitude for boundary locations in km (default=830.0) method : str AACGMV2 method, may use 'TRACE', 'ALLOWTRACE', 'BADIDEA', 'GEOCENTRIC' [2]_ (default='GEOCENTRIC|ALLOWTRACE') Returns ------- bound_files : list List of the boundary file names Raises ------ ValueError If the DMSP SSJ files could not be downloaded See Also -------- aacgmv2, requests.exceptions.ProxyError """ # Fetch the DMSP SSJ boundary files from the Zenodo archive csv_files = fetch_ssj_boundary_files(stime, etime, out_dir=out_dir, rm_temp=rm_temp) # Test to see if there are any DMSP processed files if len(csv_files) == 0: raise ValueError("".join(["unable to download the SSJ files from ", "{:} to {:}".format(stime, etime)])) # Create the boundary files bound_files = format_ssj_boundary_files(csv_files, ref_alt=ref_alt, method=method) # Remove the CSV files, as their data has been processed if rm_temp: for tmp_file in csv_files: os.remove(tmp_file) return bound_files