Source code for pydarn.sdio.radDataRead

# Copyright (C) 2012  VT SuperDARN Lab
# Full license can be found in LICENSE.txt
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
.. module:: radDataRead
   :synopsis: A module for reading radar data (iqdat, raw, fit)

.. moduleauthor:: AJ, 20130110

************************************
**Module**: pydarn.sdio.radDataRead
************************************

**Functions**:
  * :func:`pydarn.sdio.radDataRead.radDataOpen`
  * :func:`pydarn.sdio.radDataRead.radDataReadRec`
  * :func:`pydarn.sdio.radDataRead.radDataReadScan`
  * :func:`pydarn.sdio.radDataRead.radDataReadAll`
"""

[docs]def radDataOpen(sTime,rad,eTime=None,channel=None,bmnum=None,cp=None, \ fileType='fitex',filtered=False, src=None,fileName=None, \ custType='fitex',noCache=False): """A function to establish a pipeline through which we can read radar data. first it tries the mongodb, then it tries to find local files, and lastly it sftp's over to the VT data server. **Args**: * **sTime** (`datetime <http://tinyurl.com/bl352yx>`_): the beginning time for which you want data * **rad** (str): the 3-letter radar code for which you want data * **[eTime]** (`datetime <http://tinyurl.com/bl352yx>`_): the last time that you want data for. if this is set to None, it will be set to 1 day after sTime. default = None * **[channel]** (str): the 1-letter code for what channel you want data from, eg 'a','b',... if this is set to None, data from ALL channels will be read. default = None * **[bmnum]** (int): the beam number which you want data for. If this is set to None, data from all beams will be read. default = None * **[cp]** (int): the control program which you want data for. If this is set to None, data from all cp's will be read. default = None * **[fileType]** (str): The type of data you want to read. valid inputs are: 'fitex','fitacf','lmfit','rawacf','iqdat'. if you choose a fit file format and the specified one isn't found, we will search for one of the others. Beware: if you ask for rawacf/iq data, these files are large and the data transfer might take a long time. default = 'fitex' * **[filtered]** (boolean): a boolean specifying whether you want the fit data to be boxcar filtered. ONLY VALID FOR FIT. default = False * **[src]** (str): the source of the data. valid inputs are 'local' 'sftp'. if this is set to None, it will try all possibilites sequentially. default = None * **[fileName]** (str): the name of a specific file which you want to open. default=None * **[custType]** (str): if fileName is specified, the filetype of the file. default='fitex' * **[noCache]** (boolean): flag to indicate that you do not want to check first for cached files. default = False. **Returns**: * **myPtr** (:class:`pydarn.sdio.radDataTypes.radDataPtr`): a radDataPtr object which contains a link to the data to be read. this can then be passed to radDataReadRec in order to actually read the data. **Example**: :: import datetime as dt myPtr = radDataOpen(dt.datetime(2011,1,1),'bks',eTime=dt.datetime(2011,1,1,2),channel='a', bmnum=7,cp=153,fileType='fitex',filtered=False, src=None): Written by AJ 20130110 """ import paramiko as p import re import string import datetime as dt, os, pydarn.sdio, glob from pydarn.sdio import radDataPtr from pydarn.radar import network from utils.timeUtils import datetimeToEpoch #check inputs assert(isinstance(sTime,dt.datetime)), \ 'error, sTime must be datetime object' assert(isinstance(rad,str) and len(rad) == 3), \ 'error, rad must be a 3 char string' assert(eTime == None or isinstance(eTime,dt.datetime)), \ 'error, eTime must be datetime object or None' assert(channel == None or (isinstance(channel,str) and len(channel) == 1)), \ 'error, channel must be None or a 1-letter string' assert(bmnum == None or isinstance(bmnum,int)), \ 'error, bmnum must be an int or None' assert(cp == None or isinstance(cp,int)), \ 'error, cp must be an int or None' assert(fileType == 'rawacf' or fileType == 'fitacf' or \ fileType == 'fitex' or fileType == 'lmfit' or fileType == 'iqdat'), \ 'error, fileType must be one of: rawacf,fitacf,fitex,lmfit,iqdat' assert(fileName == None or isinstance(fileName,str)), \ 'error, fileName must be None or a string' assert(isinstance(filtered,bool)), \ 'error, filtered must be True of False' assert(src == None or src == 'local' or src == 'sftp'), \ 'error, src must be one of None,local,sftp' if(eTime == None): eTime = sTime+dt.timedelta(days=1) #create a datapointer object myPtr = radDataPtr(sTime=sTime,eTime=eTime,stid=int(network().getRadarByCode(rad).id), channel=channel,bmnum=bmnum,cp=cp) filelist = [] if(fileType == 'fitex'): arr = ['fitex','fitacf','lmfit'] elif(fileType == 'fitacf'): arr = ['fitacf','fitex','lmfit'] elif(fileType == 'lmfit'): arr = ['lmfit','fitex','fitacf'] else: arr = [fileType] #move back a little in time because files often start at 2 mins after the hour sTime = sTime-dt.timedelta(minutes=4) #a temporary directory to store a temporary file tmpDir = '/tmp/sd/' d = os.path.dirname(tmpDir) if not os.path.exists(d): os.makedirs(d) cached = False fileSt = None #FIRST, check if a specific filename was given if fileName != None: try: if(not os.path.isfile(fileName)): print 'problem reading',fileName,':file does not exist' return None outname = tmpDir+str(int(datetimeToEpoch(dt.datetime.now()))) if(string.find(fileName,'.bz2') != -1): outname = string.replace(fileName,'.bz2','') print 'bunzip2 -c '+fileName+' > '+outname+'\n' os.system('bunzip2 -c '+fileName+' > '+outname) elif(string.find(fileName,'.gz') != -1): outname = string.replace(fileName,'.gz','') print 'gunzip -c '+fileName+' > '+outname+'\n' os.system('gunzip -c '+fileName+' > '+outname) else: os.system('cp '+fileName+' '+outname) print 'cp '+fileName+' '+outname filelist.append(outname) myPtr.fType,myPtr.dType = custType,'dmap' fileSt = sTime except Exception, e: print e print 'problem reading file',fileName return None #Next, check for a cached file if fileName == None and not noCache: try: if filtered: for f in glob.glob("%s????????.??????.????????.??????.%s.%sf" % (tmpDir,rad,fileType)): try: ff = string.replace(f,tmpDir,'') #check time span of file t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),int(ff[9:11]),int(ff[11:13]),int(ff[13:15])) t2 = dt.datetime(int(ff[16:20]),int(ff[20:22]),int(ff[22:24]),int(ff[25:27]),int(ff[27:29]),int(ff[29:31])) #check if file covers our timespan if t1 <= sTime and t2 >= eTime: cached = True filelist.append(f) print 'Found cached file: %s' % f break except Exception,e: print e if not cached: for f in glob.glob("%s????????.??????.????????.??????.%s.%s" % (tmpDir,rad,fileType)): try: ff = string.replace(f,tmpDir,'') #check time span of file t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),int(ff[9:11]),int(ff[11:13]),int(ff[13:15])) t2 = dt.datetime(int(ff[16:20]),int(ff[20:22]),int(ff[22:24]),int(ff[25:27]),int(ff[27:29]),int(ff[29:31])) #check if file covers our timespan if t1 <= sTime and t2 >= eTime: cached = True filelist.append(f) print 'Found cached file: %s' % f break except Exception,e: print e except Exception,e: print e #Next, LOOK LOCALLY FOR FILES if not cached and (src == None or src == 'local') and fileName == None: try: for ftype in arr: print '\nLooking locally for',ftype,'files' #deal with UAF naming convention fnames = ['??.??.???.'+ftype+'.*'] if(channel == None): fnames.append('??.??.???.a.*') else: fnames.append('??.??.???.'+channel+'.*') for form in fnames: #iterate through all of the hours in the request #ie, iterate through all possible file names ctime = sTime.replace(minute=0) if(ctime.hour % 2 == 1): ctime = ctime.replace(hour=ctime.hour-1) while ctime <= eTime: #directory on the data server ################################################################## ### IF YOU ARE A USER NOT AT VT, YOU PROBABLY HAVE TO CHANGE THIS ### TO MATCH YOUR DIRECTORY STRUCTURE ################################################################## myDir = '/sd-data/'+ctime.strftime("%Y")+'/'+ftype+'/'+rad+'/' hrStr = ctime.strftime("%H") dateStr = ctime.strftime("%Y%m%d") #iterate through all of the files which begin in this hour for filename in glob.glob(myDir+dateStr+'.'+hrStr+form): outname = string.replace(filename,myDir,tmpDir) #unzip the compressed file if(string.find(filename,'.bz2') != -1): outname = string.replace(outname,'.bz2','') print 'bunzip2 -c '+filename+' > '+outname+'\n' os.system('bunzip2 -c '+filename+' > '+outname) elif(string.find(filename,'.gz') != -1): outname = string.replace(outname,'.gz','') print 'gunzip -c '+filename+' > '+outname+'\n' os.system('gunzip -c '+filename+' > '+outname) filelist.append(outname) #HANDLE CACHEING NAME ff = string.replace(outname,tmpDir,'') #check the beginning time of the file (for cacheing) t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),int(ff[9:11]),int(ff[11:13]),int(ff[14:16])) if fileSt == None or t1 < fileSt: fileSt = t1 ################################################################## ### END SECTION YOU WILL HAVE TO CHANGE ################################################################## ctime = ctime+dt.timedelta(hours=1) if(len(filelist) > 0): print 'found',ftype,'data in local files' myPtr.fType,myPtr.dType = ftype,'dmap' fileType = ftype break if(len(filelist) > 0): break else: print 'could not find',ftype,'data in local files' except Exception, e: print e print 'problem reading local data, perhaps you are not at VT?' print 'you probably have to edit radDataRead.py' print 'I will try to read from other sources' src=None #finally, check the VT sftp server if we have not yet found files if (src == None or src == 'sftp') and myPtr.ptr == None and len(filelist) == 0 and fileName == None: for ftype in arr: print '\nLooking on the remote SFTP server for',ftype,'files' try: #deal with UAF naming convention fnames = ['..........'+ftype] if(channel == None): fnames.append('..\...\....\.a\.') else: fnames.append('..........'+channel+'.'+ftype) for form in fnames: #create a transport object for use in sftp-ing transport = p.Transport((os.environ['VTDB'], 22)) transport.connect(username=os.environ['DBREADUSER'],password=os.environ['DBREADPASS']) sftp = p.SFTPClient.from_transport(transport) #iterate through all of the hours in the request #ie, iterate through all possible file names ctime = sTime.replace(minute=0) if ctime.hour % 2 == 1: ctime = ctime.replace(hour=ctime.hour-1) oldyr = '' while ctime <= eTime: #directory on the data server myDir = '/data/'+ctime.strftime("%Y")+'/'+ftype+'/'+rad+'/' hrStr = ctime.strftime("%H") dateStr = ctime.strftime("%Y%m%d") if(ctime.strftime("%Y") != oldyr): #get a list of all the files in the directory allFiles = sftp.listdir(myDir) oldyr = ctime.strftime("%Y") #create a regular expression to find files of this day, at this hour regex = re.compile(dateStr+'.'+hrStr+form) #go thorugh all the files in the directory for aFile in allFiles: #if we have a file match between a file and our regex if(regex.match(aFile)): print 'copying file '+myDir+aFile+' to '+tmpDir+aFile filename = tmpDir+aFile #download the file via sftp sftp.get(myDir+aFile,filename) #unzip the compressed file if(string.find(filename,'.bz2') != -1): outname = string.replace(filename,'.bz2','') print 'bunzip2 -c '+filename+' > '+outname+'\n' os.system('bunzip2 -c '+filename+' > '+outname) elif(string.find(filename,'.gz') != -1): outname = string.replace(filename,'.gz','') print 'gunzip -c '+filename+' > '+outname+'\n' os.system('gunzip -c '+filename+' > '+outname) else: print 'It seems we have downloaded an uncompressed file :/' print 'Strange things might happen from here on out...' filelist.append(outname) #HANDLE CACHEING NAME ff = string.replace(outname,tmpDir,'') #check the beginning time of the file t1 = dt.datetime(int(ff[0:4]),int(ff[4:6]),int(ff[6:8]),int(ff[9:11]),int(ff[11:13]),int(ff[14:16])) if fileSt == None or t1 < fileSt: fileSt = t1 ctime = ctime+dt.timedelta(hours=1) if len(filelist) > 0 : print 'found',ftype,'data on sftp server' myPtr.fType,myPtr.dType = ftype,'dmap' fileType = ftype break if len(filelist) > 0 : break else: print 'could not find',ftype,'data on sftp server' except Exception,e: print e print 'problem reading from sftp server' #check if we have found files if len(filelist) != 0: #concatenate the files into a single file if not cached: print 'Concatenating all the files in to one' #choose a temp file name with time span info for cacheing tmpName = '%s%s.%s.%s.%s.%s.%s' % (tmpDir, \ fileSt.strftime("%Y%m%d"),fileSt.strftime("%H%M%S"), \ eTime.strftime("%Y%m%d"),eTime.strftime("%H%M%S"),rad,fileType) print 'cat '+string.join(filelist)+' > '+tmpName os.system('cat '+string.join(filelist)+' > '+tmpName) for filename in filelist: print 'rm '+filename os.system('rm '+filename) else: tmpName = filelist[0] myPtr.fType = fileType myPtr.dType = 'dmap' #filter(if desired) and open the file if(not filtered): myPtr.ptr = open(tmpName,'r') else: if not fileType+'f' in tmpName: try: fTmpName = tmpName+'f' print 'fitexfilter '+tmpName+' > '+fTmpName os.system('fitexfilter '+tmpName+' > '+fTmpName) except Exception,e: print 'problem filtering file, using unfiltered' fTmpName = tmpName else: fTmpName = tmpName try: myPtr.ptr = open(fTmpName,'r') except Exception,e: print 'problem opening file' print e return None if(myPtr.ptr != None): if(myPtr.dType == None): myPtr.dType = 'dmap' return myPtr else: print '\nSorry, we could not find any data for you :(' return None
[docs]def radDataReadRec(myPtr): """A function to read a single record of radar data from a :class:`pydarn.sdio.radDataTypes.radDataPtr` object .. note:: to use this, you must first create a :class:`pydarn.sdio.radDataTypes.radDataPtr` object with :func:`radDataOpen` **Args**: * **myPtr** (:class:`pydarn.sdio.radDataTypes.radDataPtr`): contains the pipeline to the data we are after **Returns**: * **myBeam** (:class:`pydarn.sdio.radDataTypes.beamData`): an object filled with the data we are after. *will return None when finished reading* **Example**: :: import datetime as dt myPtr = radDataOpen(dt.datetime(2011,1,1),'bks',eTime=dt.datetime(2011,1,1,2),channel='a', bmnum=7,cp=153,fileType='fitex',filtered=False, src=None): myBeam = radDataReadRec(myPtr) Written by AJ 20130110 """ from pydarn.sdio.radDataTypes import radDataPtr, beamData, \ fitData, prmData, rawData, iqData, alpha import pydarn, datetime as dt #check input assert(isinstance(myPtr,radDataPtr)),\ 'error, input must be of type radDataPtr' if(myPtr.ptr == None): print 'error, your pointer does not point to any data' return None if myPtr.ptr.closed: print 'error, your file pointer is closed' return None myBeam = beamData() #do this until we reach the requested start time #and have a parameter match while(1): dfile = pydarn.dmapio.readDmapRec(myPtr.ptr) #check for valid data if dfile == None or dt.datetime.utcfromtimestamp(dfile['time']) > myPtr.eTime: #if we dont have valid data, clean up, get out print '\nreached end of data' myPtr.ptr.close() return None #check that we're in the time window, and that we have a #match for the desired params if dfile['channel'] < 2: channel = 'a' else: channel = alpha[dfile['channel']-1] if(dt.datetime.utcfromtimestamp(dfile['time']) >= myPtr.sTime and \ dt.datetime.utcfromtimestamp(dfile['time']) <= myPtr.eTime and \ (myPtr.stid == None or dfile['stid'] == 0 or myPtr.stid == dfile['stid']) and (myPtr.channel == None or myPtr.channel == channel) and (myPtr.bmnum == None or myPtr.bmnum == dfile['bmnum']) and (myPtr.cp == None or myPtr.cp == dfile['cp'])): #fill the beamdata object myBeam.updateValsFromDict(dfile) myBeam.fit.updateValsFromDict(dfile) myBeam.prm.updateValsFromDict(dfile) myBeam.rawacf.updateValsFromDict(dfile) myBeam.iqdat.updateValsFromDict(dfile) myBeam.fType = myPtr.fType if(myPtr.fType == 'fitacf' or myPtr.fType == 'fitex' or myPtr.fType == 'lmfit'): if myBeam.fit.slist == None: myBeam.fit.slist = [] return myBeam
[docs]def radDataReadScan(myPtr): """A function to read a full scan of data from a :class:`pydarn.sdio.radDataTypes.radDataPtr` object .. note:: to use this, you must first create a :class:`pydarn.sdio.radDataTypes.radDataPtr` object with :func:`radDataOpen` .. note:: This will ignore any bmnum request. Also, if no channel was specified in radDataOpen, it will only read channel 'a' **Args**: * **myPtr** (:class:`pydarn.sdio.radDataTypes.radDataPtr`): contains the pipeline to the data we are after **Returns**: * **myScan** (:class:`pydarn.sdio.radDataTypes.scanData`): an object filled with the data we are after. *will return None when finished reading* **Example**: :: import datetime as dt myPtr = radDataOpen(dt.datetime(2011,1,1),'bks',eTime=dt.datetime(2011,1,1,2),channel='a', bmnum=7,cp=153,fileType='fitex',filtered=False, src=None): myBeam = radDataReadScan(myPtr) Written by AJ 20130110 """ from pydarn.sdio import radDataPtr, beamData, fitData, prmData, \ rawData, iqData, alpha, scanData import pydarn, datetime as dt #check input assert(isinstance(myPtr,radDataPtr)),\ 'error, input must be of type radDataPtr' if(myPtr.ptr == None): print 'error, your pointer does not point to any data' return None if myPtr.ptr.closed: print 'error, your file pointer is closed' return None myScan = scanData() if myPtr.fBeam != None: myScan.append(myPtr.fBeam) else: firstflg = True if myPtr.channel == None: tmpchn = 'a' else: tmpchn = myPtr.channel #do this until we reach the requested start time #and have a parameter match while(1): #read the next record from the dmap file dfile = pydarn.dmapio.readDmapRec(myPtr.ptr) #check for valid data if(dfile == None or dt.datetime.utcfromtimestamp(dfile['time']) > myPtr.eTime): #if we dont have valid data, clean up, get out print '\nreached end of data' myPtr.ptr.close() return None #check that we're in the time window, and that we have a #match for the desired params if(dfile['channel'] < 2): channel = 'a' else: channel = alpha[dfile['channel']-1] if(dt.datetime.utcfromtimestamp(dfile['time']) >= myPtr.sTime and \ dt.datetime.utcfromtimestamp(dfile['time']) <= myPtr.eTime and \ (myPtr.stid == None or myPtr.stid == dfile['stid']) and (tmpchn == channel) and (myPtr.cp == None or myPtr.cp == dfile['cp'])): #fill the beamdata object myBeam = beamData() myBeam.updateValsFromDict(dfile) myBeam.fit.updateValsFromDict(dfile) myBeam.prm.updateValsFromDict(dfile) myBeam.rawacf.updateValsFromDict(dfile) myBeam.iqdat.updateValsFromDict(dfile) myBeam.fType = myPtr.fType if(myPtr.fType == 'fitacf' or myPtr.fType == 'fitex' or myPtr.fType == 'lmfit'): if(myBeam.fit.slist == None): myBeam.fit.slist = [] if(myBeam.prm.scan == 0 or firstflg): myScan.append(myBeam) firstflg = False else: myPtr.fBeam = myBeam return myScan
[docs]def radDataReadAll(myPtr): """A function to read a large amount (to the end of the request) of radar data into a list from a :class:`pydarn.sdio.radDataTypes.radDataPtr` object .. note:: to use this, you must first create a :class:`pydarn.sdio.radDataTypes.radDataPtr` object with :func:`radDataOpen` **Args**: * **myPtr** (:class:`pydarn.sdio.radDataTypes.radDataPtr`): contains the pipeline to the data we are after **Returns**: * **myList** (list): a list filled with :class:`pydarn.sdio.radDataTypes.scanData` objects holding the data we are after. *will return None if nothing is found* **Example**: :: import datetime as dt myPtr = radDataOpen(dt.datetime(2011,1,1),'bks',eTime=dt.datetime(2011,1,1,2),channel='a', bmnum=7,cp=153,fileType='fitex',filtered=False, src=None): myList = radDataReadAll(myPtr) Written by AJ 20130606 """ from pydarn.sdio import radDataPtr, beamData, fitData, prmData, \ rawData, iqData, alpha, scanData import pydarn, datetime as dt #check input if not isinstance(myPtr,radDataPtr): 'error, input must be of type radDataPtr' return None if myPtr.ptr == None: print 'error, your pointer does not point to any data' return None if myPtr.ptr.closed: print 'error, your file pointer is closed' return None myScan = scanData() if(myPtr.fBeam != None): myScan.append(myPtr.fBeam) else: firstflg = True if(myPtr.channel == None): tmpchn = 'a' else: tmpchn = myPtr.channel #do this until we reach the requested start time #and have a parameter match while(1): #read the next record from the dmap file dfile = pydarn.dmapio.readDmapRec(myPtr.ptr) #check for valid data if(dfile == None or dt.datetime.utcfromtimestamp(dfile['time']) > myPtr.eTime): #if we dont have valid data, clean up, get out print '\nreached end of data' myPtr.ptr.close() return None #check that we're in the time window, and that we have a #match for the desired params if(dfile['channel'] < 2): channel = 'a' else: channel = alpha[dfile['channel']-1] if(dt.datetime.utcfromtimestamp(dfile['time']) >= myPtr.sTime and \ dt.datetime.utcfromtimestamp(dfile['time']) <= myPtr.eTime and \ (myPtr.stid == None or myPtr.stid == dfile['stid']) and (tmpchn == channel) and (myPtr.cp == None or myPtr.cp == dfile['cp'])): #fill the beamdata object myBeam = beamData() myBeam.updateValsFromDict(dfile) myBeam.fit.updateValsFromDict(dfile) myBeam.prm.updateValsFromDict(dfile) myBeam.rawacf.updateValsFromDict(dfile) myBeam.iqdat.updateValsFromDict(dfile) myBeam.fType = myPtr.fType if(myPtr.fType == 'fitacf' or myPtr.fType == 'fitex' or myPtr.fType == 'lmfit'): if(myBeam.fit.slist == None): myBeam.fit.slist = [] if(myBeam.prm.scan == 0 or firstflg): myScan.append(myBeam) firstflg = False else: myPtr.fBeam = myBeam return myScan