Source code for pydarn.sdio.sdDataRead

# Copyright (C) 2012  VT SuperDARN Lab
# Full license can be found in LICENSE.txt
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
.. module:: sdDataRead
   :synopsis: A module for reading processed SD data (grid, map)

.. moduleauthor:: AJ, 20130607

************************************
**Module**: pydarn.sdio.sdDataRead
************************************

**Functions**:
  * :func:`pydarn.sdio.sdDataRead.sdDataOpen`
  * :func:`pydarn.sdio.sdDataRead.sdDataReadRec`
  * :func:`pydarn.sdio.sdDataRead.sdDataReadAll`
"""

[docs]def sdDataOpen(sTime,hemi='north',eTime=None,fileType='grdex',src=None,fileName=None, \ custType='grdex',noCache=False): """A function to establish a pipeline through which we can read radar data. first it tries the mongodb, then it tries to find local files, and lastly it sftp's over to the VT data server. **Args**: * **sTime** (`datetime <http://tinyurl.com/bl352yx>`_): the beginning time for which you want data * **[hemi]** (str): the hemisphere for which you want data, 'north' or 'south'. default = 'north' * **[eTime]** (`datetime <http://tinyurl.com/bl352yx>`_): the last time that you want data for. if this is set to None, it will be set to 1 day after sTime. default = None * **[fileType]** (str): The type of data you want to read. valid inputs are: 'grd','grdex','map','mapex'. If you choose a file format and the specified one isn't found, we will search for one of the others (eg mapex instead of map). default = 'grdex'. * **[src]** (str): the source of the data. valid inputs are 'local' 'sftp'. if this is set to None, it will try all possibilites sequentially. default = None * **[fileName]** (str): the name of a specific file which you want to open. If this is set, we will not look for cached files. default=None * **[custType]** (str): if fileName is specified, the filetype of the file. default = 'grdex' * **[noCache]** (boolean): flag to indicate that you do not want to check first for cached files. default = False. **Returns**: * **myPtr** (:class:`pydarn.sdio.sdDataTypes.sdDataPtr`): a sdDataPtr object which contains a link to the data to be read. this can then be passed to sdDataReadRec in order to actually read the data. **Example**: :: import datetime as dt myPtr = sdDataOpen(dt.datetime(2011,1,1),hemi='north'): Written by AJ 20130607 """ import paramiko as p import re import string import datetime as dt import os import pydarn.sdio import glob from pydarn.sdio import sdDataPtr from pydarn.radar import network from utils.timeUtils import datetimeToEpoch #check inputs assert(isinstance(sTime,dt.datetime)), \ 'error, sTime must be datetime object' assert(hemi == 'north' or hemi == 'south'), \ "error, hemi must be 'north' or 'south'" assert(eTime == None or isinstance(eTime,dt.datetime)), \ 'error, eTime must be datetime object or None' assert(fileType == 'grd' or fileType == 'grdex' or \ fileType == 'map' or fileType == 'mapex'), \ "error, fileType must be one of: 'grd','grdex','map','mapex'" assert(fileName == None or isinstance(fileName,str)), \ 'error, fileName must be None or a string' assert(src == None or src == 'local' or src == 'sftp'), \ 'error, src must be one of None,local,sftp' if eTime == None: eTime = sTime+dt.timedelta(days=1) #create a datapointer object myPtr = sdDataPtr(sTime=sTime,eTime=eTime,hemi=hemi) filelist = [] if fileType == 'grd': arr = ['grd','grdex'] elif fileType == 'grdex': arr = ['grdex','grd'] elif fileType == 'map': arr = ['map','mapex'] elif fileType == 'mapex': arr = ['mapex','map'] else: arr = [fileType] #move back a little in time because files often start at 2 mins after the hour sTime = sTime-dt.timedelta(minutes=4) #a temporary directory to store a temporary file tmpDir = '/tmp/sd/' d = os.path.dirname(tmpDir) if not os.path.exists(d): os.makedirs(d) cached = False fileSt = None #FIRST, check if a specific filename was given if fileName != None: try: if(not os.path.isfile(fileName)): print 'problem reading',fileName,':file does not exist' return None outname = tmpDir+str(int(datetimeToEpoch(dt.datetime.now()))) if(string.find(fileName,'.bz2') != -1): outname = string.replace(fileName,'.bz2','') print 'bunzip2 -c '+fileName+' > '+outname+'\n' os.system('bunzip2 -c '+fileName+' > '+outname) elif(string.find(fileName,'.gz') != -1): outname = string.replace(fileName,'.gz','') print 'gunzip -c '+fileName+' > '+outname+'\n' os.system('gunzip -c '+fileName+' > '+outname) else: os.system('cp '+fileName+' '+outname) print 'cp '+fileName+' '+outname filelist.append(outname) myPtr.fType,myPtr.dType = custType,'dmap' fileSt = sTime except Exception, e: print e print 'problem reading file',fileName return None #Next, check for a cached file if fileName == None and not noCache: try: if not cached: for f in glob.glob("%s????????.??????.????????.??????.%s.%s" % (tmpDir,hemi,fileType)): try: ff = string.replace(f,tmpDir,'') #check time span of file t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),int(ff[9:11]),int(ff[11:13]),int(ff[13:15])) t2 = dt.datetime(int(ff[16:20]),int(ff[20:22]),int(ff[22:24]),int(ff[25:27]),int(ff[27:29]),int(ff[29:31])) #check if file covers our timespan if t1 <= sTime and t2 >= eTime: cached = True filelist.append(f) print 'Found cached file: %s' % f break except Exception,e: print e except Exception,e: print e #Next, LOOK LOCALLY FOR FILES if not cached and (src == None or src == 'local') and fileName == None: try: for ftype in arr: ################################################################## ### IF YOU ARE A USER NOT AT VT, YOU PROBABLY HAVE TO CHANGE THIS ### TO MATCH YOUR DIRECTORY/FILE STRUCTURE ################################################################## print '\nLooking locally for',ftype,'files' form = '%s.%s.*' % (hemi,ftype) #iterate through all of the days in the request #ie, iterate through all possible file names ctime = sTime while ctime <= eTime: #directory on the data server myDir = '/sd-data/'+ctime.strftime("%Y")+'/'+ftype+'/'+hemi+'/' dateStr = ctime.strftime("%Y%m%d") #iterate through all of the files which begin in this hour for filename in glob.glob(myDir+dateStr+'.'+form): outname = string.replace(filename,myDir,tmpDir) #unzip the compressed file if(string.find(filename,'.bz2') != -1): outname = string.replace(outname,'.bz2','') print 'bunzip2 -c '+filename+' > '+outname+'\n' os.system('bunzip2 -c '+filename+' > '+outname) elif(string.find(filename,'.gz') != -1): outname = string.replace(outname,'.gz','') print 'gunzip -c '+filename+' > '+outname+'\n' os.system('gunzip -c '+filename+' > '+outname) filelist.append(outname) #HANDLE CACHEING NAME ff = string.replace(outname,tmpDir,'') #check the beginning time of the file (for cacheing) t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),0,0,0) if fileSt == None or t1 < fileSt: fileSt = t1 ctime = ctime+dt.timedelta(days=1) if len(filelist) > 0: print 'found',ftype,'data in local files' myPtr.fType = ftype fileType = ftype break else: print 'could not find',ftype,'data in local files' ################################################################## ### END SECTION YOU WILL HAVE TO CHANGE ################################################################## except Exception, e: print e print 'problem reading local data, perhaps you are not at VT?' print 'you probably have to edit sdDataRead.py' print 'I will try to read from other sources' src=None #finally, check the VT sftp server if we have not yet found files if (src == None or src == 'sftp') and myPtr.ptr == None and len(filelist) == 0 and fileName == None: for ftype in arr: print '\nLooking on the remote SFTP server for',ftype,'files' try: form = '......'+ftype #create a transport object for use in sftp-ing transport = p.Transport((os.environ['VTDB'], 22)) transport.connect(username=os.environ['DBREADUSER'],password=os.environ['DBREADPASS']) sftp = p.SFTPClient.from_transport(transport) #iterate through all of the hours in the request #ie, iterate through all possible file names ctime = sTime oldyr = '' while ctime <= eTime: #directory on the data server myDir = '/data/'+ctime.strftime("%Y")+'/'+ftype+'/'+hemi+'/' dateStr = ctime.strftime("%Y%m%d") if ctime.strftime("%Y") != oldyr: #get a list of all the files in the directory allFiles = sftp.listdir(myDir) oldyr = ctime.strftime("%Y") #create a regular expression to find files of this day, at this hour regex = re.compile(dateStr+'.'+form) #go thorugh all the files in the directory for aFile in allFiles: #if we have a file match between a file and our regex if regex.match(aFile): print 'copying file '+myDir+aFile+' to '+tmpDir+aFile filename = tmpDir+aFile #download the file via sftp sftp.get(myDir+aFile,filename) #unzip the compressed file if(string.find(filename,'.bz2') != -1): outname = string.replace(filename,'.bz2','') print 'bunzip2 -c '+filename+' > '+outname+'\n' os.system('bunzip2 -c '+filename+' > '+outname) elif(string.find(filename,'.gz') != -1): outname = string.replace(filename,'.gz','') print 'gunzip -c '+filename+' > '+outname+'\n' os.system('gunzip -c '+filename+' > '+outname) else: print 'It seems we have downloaded an uncompressed file :/' print 'Strange things might happen from here on out...' filelist.append(outname) #HANDLE CACHEING NAME ff = string.replace(outname,tmpDir,'') #check the beginning time of the file t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),0,0,0) if fileSt == None or t1 < fileSt: fileSt = t1 ctime = ctime+dt.timedelta(days=1) if len(filelist) > 0 : print 'found',ftype,'data on sftp server' myPtr.fType = ftype fileType = ftype break else: print 'could not find',ftype,'data on sftp server' except Exception,e: print e print 'problem reading from sftp server' #check if we have found files if len(filelist) != 0: #concatenate the files into a single file if not cached: print 'Concatenating all the files in to one' #choose a temp file name with time span info for cacheing tmpName = '%s%s.%s.%s.%s.%s.%s' % (tmpDir, \ fileSt.strftime("%Y%m%d"),fileSt.strftime("%H%M%S"), \ eTime.strftime("%Y%m%d"),eTime.strftime("%H%M%S"),hemi,fileType) print 'cat '+string.join(filelist)+' > '+tmpName os.system('cat '+string.join(filelist)+' > '+tmpName) for filename in filelist: print 'rm '+filename os.system('rm '+filename) else: tmpName = filelist[0] myPtr.fType = fileType myPtr.dType = 'dmap' #filter(if desired) and open the file myPtr.ptr = open(tmpName,'r') if myPtr.ptr != None: return myPtr else: print '\nSorry, we could not find any data for you :(' return None
[docs]def sdDataReadRec(myPtr): """A function to read a single record of radar data from a :class:`pydarn.sdio.sdDataTypes.sdDataPtr` object .. note:: to use this, you must first create a :class:`pydarn.sdio.sdDataTypes.sdDataPtr` object with :func:`sdDataOpen` **Args**: * **myPtr** (:class:`pydarn.sdio.sdDataTypes.sdDataPtr`): contains the pipeline to the data we are after **Returns**: * **myData** (:class:`pydarn.sdio.sdDataTypes.gridData` or :class:`pydarn.sdio.sdDataTypes.mapData`): an object filled with the data we are after. *will return None when finished reading the file* **Example**: :: import datetime as dt myPtr = sdDataOpen(dt.datetime(2011,1,1),'south'): myData = sdDataReadRec(myPtr) Written by AJ 20130610 """ from pydarn.sdio.sdDataTypes import sdDataPtr, gridData, mapData, alpha import pydarn import datetime as dt #check input assert(isinstance(myPtr,sdDataPtr)),\ 'error, input must be of type sdDataPtr' if myPtr.ptr == None: print 'error, your pointer does not point to any data' return None if myPtr.ptr.closed: print 'error, your file pointer is closed' return None if myPtr.fType == 'grd' or myPtr.fType == 'grdex': myData = gridData() elif myPtr.fType == 'map' or myPtr.fType == 'mapex': myData = mapData() else: print 'error, unrecognized file type' return None #do this until we reach the requested start time #and have a parameter match while(1): dfile = pydarn.dmapio.readDmapRec(myPtr.ptr) #check for valid data try: dtime = dt.datetime(dfile['start.year'],dfile['start.month'],dfile['start.day'], \ dfile['start.hour'],dfile['start.minute'],int(dfile['start.second'])) except Exception,e: print e print 'problem reading time from file, returning None' return None if dfile == None or dtime > myPtr.eTime: #if we dont have valid data, clean up, get out print '\nreached end of data' myPtr.ptr.close() return None #check that we're in the time window, and that we have a #match for the desired params if myPtr.sTime <= dtime <= myPtr.eTime: #fill the data object if myPtr.fType == 'grd' or myPtr.fType == 'grdex': myData = gridData(dataDict=dfile) elif myPtr.fType == 'map' or myPtr.fType == 'mapex': myData = mapData(dataDict=dfile) else: print 'error, unrecognized file type' return None myData.fType = myPtr.fType return myData
[docs]def sdDataReadAll(myPtr): """A function to read a large amount (to the end of the request) of radar data into a list from a :class:`pydarn.sdio.sdDataTypes.sdDataPtr` object .. note:: to use this, you must first create a :class:`pydarn.sdio.sdDataTypes.sdDataPtr` object with :func:`sdDataOpen` **Args**: * **myPtr** (:class:`pydarn.sdio.sdDataTypes.sdDataPtr`): contains the pipeline to the data we are after **Returns**: * **myList** (list): a list filled with :class:`pydarn.sdio.sdDataTypes.gridData` or :class:`pydarn.sdio.sdDataTypes.mapData` objects holding the data we are after. *will return None if nothing is found* **Example**: :: import datetime as dt myPtr = sdDataOpen(dt.datetime(2011,1,1),'bks',eTime=dt.datetime(2011,1,1,2),channel='a', bmnum=7,cp=153,fileType='fitex',filtered=False, src=None): myList = sdDataReadAll(myPtr) Written by AJ 20130606 """ from pydarn.sdio.sdDataTypes import sdDataPtr, gridData, mapData, alpha import pydarn import datetime as dt #check input assert(isinstance(myPtr,sdDataPtr)),\ 'error, input must be of type sdDataPtr' if myPtr.ptr == None: print 'error, your pointer does not point to any data' return None if myPtr.ptr.closed: print 'error, your file pointer is closed' return None myList = [] #do this until we reach the requested start time #and have a parameter match while(1): dfile = pydarn.dmapio.readDmapRec(myPtr.ptr) #check for valid data try: dtime = dt.datetime(dfile['start.year'],dfile['start.month'],dfile['start.day'], \ dfile['start.hour'],dfile['start.minute'],int(dfile['start.second'])) except Exception,e: print e print 'problem reading time from file, returning None' return None if dfile == None or dtime > myPtr.eTime: #if we dont have valid data, clean up, get out print '\nreached end of data' myPtr.ptr.close() break #check that we're in the time window, and that we have a #match for the desired params if myPtr.sTime <= dtime <= myPtr.eTime: #fill the beamdata object if myPtr.fType == 'grd' or myPtr.fType == 'grdex': myData = gridData(dataDict=dfile) elif myPtr.fType == 'map' or myPtr.fType == 'mapex': myData = mapData(dataDict=dfile) else: print 'error, unrecognized file type' return None myData.fType = myPtr.fType myList.append(myData) if len(myList > 0): print 'returning a list with %d records of data' % len(myList) return myList else: print 'No data found, returning None' return None