# -*- coding: iso-8859-1 -*- ######################################################################################## # # Description : # ------------- # # Compare the structure and eventually the datasets of 2 files, either in NETCDF or HDF format # # Limitations : # ------------- # # Supports only comparison of : # - attributes : file and SDS # - datasets : SDS # No support for VDATA, VGROUPS... is currently provided # # Synopsis : # ---------- # # python pyhdiff.py # # Input : # ------- # # a HDF4 or netcdf file [REQUIRED] # a HDF4 or netcdf file [REQUIRED] # activate or not data comparison, either 0 or 1. Activated by default [OPTIONAL] # # Output : # -------- # # 0 if run end normally | 1 if something has gone wrong # # Prerequisites : # --------------- # # - python >= 2.5 # - HDF4 >= 4.2.r1 # - numpy >= 1.2.1 # - pyhdf >= 0.8.3 # # Earlier versions are probably supported but not tested # # Author : # -------- # # HYGEOS/CGTD-ICARE/UDEV Nicolas PASCAL # # License : # --------- # # This file must be used under the terms of the CeCILL. # This source file is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at # http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt # # History : # -------- # # v1.0.0 : 2011/05/06 # - packaging for web release # # v0.0.0 : 2010/01/19 # - creation # ######################################################################################## import os, os.path,sys from pyhdf.SD import * import numpy import warnings warnings.filterwarnings("ignore") EXIT_SUCCESS=0 EXIT_FAILURE=1 def usage (): """ print out usage """ print "Usage :" print " python pyhdiff.py " + os.linesep print "With :" print " a HDF4 or netcdf file [REQUIRED]" print " a HDF4 or netcdf file [REQUIRED]" print " either 0 or 1 : activate or not data comparison. Enabled by default [OPTIONAL]" + os.linesep def char2num ( data ): """ convert a character array to a uint8 one and return it """ shape = data.shape data = numpy.frombuffer ( data.data, dtype = numpy.uint8 ) data.shape = shape return data if len ( sys.argv ) < 3 : usage() print "At least 2 arguments are required. Abort" sys.exit(EXIT_FAILURE) f1 = sys.argv[1] f2 = sys.argv[2] enable_data_compare = True if len ( sys.argv ) > 3 : enable_data_compare = bool ( int ( sys.argv[3] ) ) file1 = SD ( f1, SDC.READ ) file2 = SD ( f2, SDC.READ ) #print "-" * 10 + " file attributes " + "-" * 10 file_attr1 = set ( file1.attributes().keys() ) file_attr2 = set ( file2.attributes().keys() ) missing1 = file_attr1 - file_attr2 missing2 = file_attr2 - file_attr1 common = file_attr1 & file_attr2 if len ( missing1 ) != 0 : print "Attr : only in file 1" for k in iter ( missing1 ) : print " " * 1 + k print if len ( missing2 ) != 0 : print "Attr : only in file 2" for k in iter ( missing2 ) : print " " * 1 + str ( k ) print if len ( common ) != 0 : print "Attr : common" for k in iter ( common ) : print " " * 1 + str ( k ) + " : ", if ( file1.attributes()[ k ] != file2.attributes()[ k ] ) : print "Different" else : print "Identical" print #print "-" * 10 + " variables " + "-" * 10 sds1 = set ( file1.datasets().keys() ) sds2 = set ( file2.datasets().keys() ) missing1 = sds1 - sds2 missing2 = sds2 - sds1 common = sds1 & sds2 if len ( missing1 ) != 0 : print "SDS : only in file 1" for k in iter ( missing1 ) : print " " * 1 + str ( k ) print if len ( missing2 ) != 0 : print "SDS : only in file 2" for k in iter ( missing2 ) : print " " * 1 + str ( k ) print if len ( common ) != 0: print "SDS : common" for k in iter ( common ) : print " " * 1 + "*** " + str ( k ) + " ***" sds1 = file1.select ( k ) sds2 = file2.select ( k ) sds_attr1 = set ( sds1.attributes().keys() ) sds_attr2 = set ( sds2.attributes().keys() ) missing1 = sds_attr1 - sds_attr2 missing2 = sds_attr2 - sds_attr1 common = sds_attr1 & sds_attr2 if enable_data_compare : data1 = sds1.get() data2 = sds2.get() # difference of character does not mamse sense -> convert it to uint8 if ( data1.dtype == "|S1" ): data1 = char2num ( data1 ) if ( data2.dtype == "|S1" ): data2 = char2num ( data2 ) diff = data1 - data2 # treat only noy Inf or NaN values data1_finite = numpy.isfinite ( data1 ) data2_finite = numpy.isfinite ( data2 ) # several cases : same finite mask in both or different diff_finite = None is_same_finite_mask = numpy.all ( data1_finite == data2_finite ) if ( is_same_finite_mask ) : diff_finite = data1 [data1_finite] - data2 [data2_finite] else : # compute diff only on finite values in both datasets both_finite = ( data1_finite & data2_finite ) diff_finite = data1 [both_finite] - data2 [both_finite] both_finite = None data1 = None data2 = None print " " * 1 + "Data : ", if numpy.any ( diff != 0 ) : print "Different : " print " mean = %f std = %f min = %f max = %f"%( diff.mean(), diff.std(), diff.min(), diff.max() ) #print " imin = %s"%str(numpy.where ( diff == diff.min() ) ) #print " imax = %s"%str(numpy.where ( diff == diff.max() ) ) if ( ( not numpy.all (data1_finite) ) or ( not numpy.all (data2_finite) ) ) : if ( diff_finite.size == 0 ): # no finite values print " No finite values" elif (is_same_finite_mask) : # same finite mask print " Finite masks : IDENTICAL. Finite values stats :" print " mean = %f std = %f min = %f max = %f"%( diff_finite.mean(), diff_finite.std(), diff_finite.min(), diff_finite.max() ) else : # different finite mask print " Finite masks : DIFFERENT. Finite values in both datasets stats :" print " mean = %f std = %f min = %f max = %f"%( diff_finite.mean(), diff_finite.std(), diff_finite.min(), diff_finite.max() ) diff = None data1_finite = None data2_finite = None diff_mask_finite = None else : print "Identical" print if len ( missing1 ) != 0 : print " " * 1 + "Attr : only in file 1" for k in iter ( missing1 ) : print " " * 2 + str ( k ) print if len ( missing2 ) != 0 : print " " * 1 + "Attr : only in file 2" for k in iter ( missing2 ) : print " " * 2 + str ( k ) print if len ( common ) != 0 : print " " * 1 + "Attr : common" for k in iter ( common ) : print " " * 2 + str ( k ) + " : ", if ( sds1.attributes()[ k ] != sds2.attributes()[ k ] ) : print "Different" else : print "Identical" print sys.exit ( EXIT_SUCCESS )