/*************************************************************************** * Copyright (C) 2011 by Nicolas PASCAL * * nicolas.pascal@icare.univ-lille1.fr * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef GRIBFILEDATA_H #define GRIBFILEDATA_H #include #include #include #include "grib_api.h" #include "filedatareader.h" using namespace std; #define MAX_KEY_LEN 255 #define MAX_VAL_LEN 1024 /** * @class t_grib_var a GRIB variable descriptor * It is mainly sets which message(s) are related to a variable */ class t_grib_var_desc { public: /** name of the variable */ string name; /** int name of the variable */ string long_name; /** GRIB parameter ID */ int param_id; /** type of the packed binary data in a numpy-like format : uint16, float32... */ string type_id; /** longitude dimension size */ int sz_x; /** latitude dimension size */ int sz_y; /** type of z level */ string z_level_type; /** fill value */ double fill_value; /** scaling factor */ double scale; /** scaling add offset */ double offset; /** indices of messages related to the variable. As many messages as vertical levels */ vector v_msg; /** vertical level of the messages, with the same indice than in @a v_msg */ vector v_lvl; /** * @brief constructor * @param name name of the variable * @param long_name extended name of the variable * @param param_id GRIB parameter ID * @param type_id type of the packed binary data * @param sz_x longitude dimension size * @param sz_y latitude dimension size * @param z_level_type type of vertical level. Can be an empty string, surface, isobaricInhPa or hybrid * @param fill_value the fill value * @param scale scaling factor * @param offset scaling add offset */ t_grib_var_desc (const string & name = "", const string & long_name = "", const int param_id = -1, const string & type_id = "", const int sz_x = -1, const int sz_y = -1, const string & z_level_type = "", const double fill_value = -DBL_MAX, const double scale = -DBL_MAX, const double offset = -DBL_MAX) : name (name), long_name (long_name), param_id (param_id), type_id (type_id), sz_x (sz_x), sz_y (sz_y), z_level_type (z_level_type), fill_value (fill_value), scale (scale), offset (offset) { // check pressure level validity if (!z_level_type.empty() && z_level_type != string("surface") && z_level_type != string("isobaricInhPa") && z_level_type != string("hybrid") ) { string msg ( "Invalid Pressure level type " + z_level_type ) ; g_exception e( __FILE__ , __LINE__ , msg ); throw e; } v_msg = vector (0); v_lvl = vector (0); }; /** * @brief destructor */ ~t_grib_var_desc () { v_msg.clear(); v_lvl.clear(); }; /** * @brief return content description as an output stream */ friend std::ostream &operator<< (std::ostream &stream, const t_grib_var_desc &grib_var_desc) { stream << grib_var_desc.to_string(); return stream; } /** * @brief return content description as a string */ const string to_string (void) const { string desc (""); desc += "var name : " + name + "\n"; desc += "long name : " + long_name + "\n"; desc += "param ID : " + MyTools::to_string (param_id) + "\n"; desc += "Data Type : " + MyTools::to_string (type_id) + "\n"; desc += "grid size : " + MyTools::to_string (sz_y) + "x" + MyTools::to_string (sz_x) + "\n"; desc += "Z level type : " + z_level_type + "\n"; desc += "Fill value : " + MyTools::to_string (fill_value) + "\n"; desc += "Scale factor : " + MyTools::to_string (scale) + "\n"; desc += "Scale offset : " + MyTools::to_string (offset) + "\n"; desc += "Msg indices : "; for (vector ::const_iterator it_msg = v_msg.begin(); it_msg != v_msg.end() ; ++it_msg ) desc += MyTools::to_string (*it_msg) + " "; desc += "\n"; desc += "Msg levels : "; for (vector ::const_iterator it_lvl = v_lvl.begin(); it_lvl != v_lvl.end() ; ++it_lvl ) desc += MyTools::to_string (*it_lvl) + " "; desc += "\n"; return desc; }; /** * @brief add the given message to the list of variable's messages * @param i_msg indice of the message * @param z_lvl vertical level value */ void add_msg (const int i_msg, const int z_lvl) { v_msg.push_back(i_msg); v_lvl.push_back(z_lvl); } /** * @brief access to the variable dimensions size, in [Z,Y,X] order for multi levels variables. [Y,X] for 2D * @return the dimensions size */ vector get_sz_dims (void) { vector sz_dims (0); sz_dims.reserve (3); int sz_z = v_lvl.size(); if (sz_z > 1) // 3D var sz_dims.push_back(sz_z); sz_dims.push_back(sz_y); sz_dims.push_back(sz_x); return sz_dims; } }; /** * @class GribFileData manage the read access in a GRIB1 or GRIB2 file * There is some specific words used in GRIB files : * - Message : a message is one record of 2D data. A 3D dataset will be composed of many messages * - Key : many keys are associated to a message. They can be the name of the parametre, the pressure levels if any, etc... It is a dataset attribute *@author Nicolas PASCAL (nicolas.pascal@icare.univ-lille1.fr) */ class GribFileData : virtual public FileData { protected: // --- parameters attributes IDs --- // /** name of a parameter key ID */ static const string var_name_id; /** name of a parameter code key ID */ static const string parameter_code_id; /** int name of a parameter */ static const string parameter_name_id; /** fill value key ID */ static const string fill_value_id; /** scale factor key ID */ static const string scale_factor_id; /** scale offset key ID */ static const string add_offset_id; /** longitude dimension key ID */ static const string x_dim_id; /** latitude dimension key ID */ static const string y_dim_id; /** pressure type of level key ID. Type can be surface, isobaricInhPa or hybrid */ static const string level_type_id; /** pressure level key ID */ static const string level_id; /** binary data description flag key ID */ static const string data_flag_id; /** number of bits used by a value key ID */ static const string data_width_id; /** data values access key ID */ static const string data_id; /** low level file descriptor */ FILE * fd; /** the GRIB API low level file handle */ // grib_handle * handle; /** total number of messages */ int n_msg; /** current processed message */ // int icur_msg; /** maps the parametres, using their names, to its variable descriptor */ typedef map < string, t_grib_var_desc > t_var_table; t_var_table var_table; public: /********************* ** CONSTRUCTORS ** *********************/ /** * @brief Open and initialize the file. * @param name the filename of the file to be opened (including its path) * @param mode the opening mode : "r", "w" or "rw". At this time, only the reading "r" mode is fully implemented * @param open_file tell wether the file must be opened during the instantiation or not */ GribFileData(const string &name = "", const string &mode = "r", const bool open_file = false); /** * @brief Copy constructor. * @param fdata the GribFileData object to be recopied. */ GribFileData(const GribFileData &fdata); /** * @brief Destructor */ virtual ~GribFileData(); /** * @brief Affectation. * @param fd the GribFileData object to be recopied. */ GribFileData &operator= (const GribFileData &fdata); /** * @brief open the file in read mode and create the API handle */ void open_data_file(); /** * @brief close the file and free the ressources used by it */ void close_data_file(); /** * @brief Check if the file has already been loaded */ const bool is_file_loaded(); /** * @brief check if @a ds_name is an existing dataset * @param ds_name name of a dataset * @return true if the dataset exists */ const bool is_existing_dataset (const string & ds_name) { return (var_table.find(ds_name) != var_table.end()); }; /** * @brief return a reference the related variable descriptor * @param ds_name name of a dataset * @return a variable descriptor */ const t_grib_var_desc & get_dataset_desc (const string & ds_name) { return var_table [ds_name]; }; /** * @brief for a given message, search the value of a key * Raise an exception if the key does not exist * @param i_msg indice of the searched message * @param key the searched key * @return the value of the key */ string get_msg_key (int i_msg, const string & key); /** * @brief read the fill value of the dataset given in parametre * @param dataset_name the name of the dataset where to read the fill value * @param fill_value (output) where to store the read value */ virtual void get_dataset_fill_value(const string &dataset_name, void* fill_value); /** * @brief access the size of the given dataset * @param dataset_name the name of the dataset * @return a vector that contains the dimensions of the dataset in each direction */ vector get_dataset_dimension (const string &dataset_name); /** * @brief return the number of datasets in the file * @return the number of datasets */ int get_n_dataset() { return var_table.size(); }; /** * @brief get the short name of the record at index i * @param i_dataset index of the dataset * @return the name of the dataset as a string */ string get_dataset_name (int i_dataset) { return get_msg_key (i_dataset, GribFileData::var_name_id); }; /** * @brief get the extended name of the record at index i * @param i_dataset index of the dataset * @return the extended name of the dataset as a string */ string get_dataset_long_name (int i_dataset) { return get_msg_key (i_dataset, GribFileData::parameter_name_id); }; /** * @brief get the level of the record at index i * @param i_dataset index of the dataset * @return the level of the dataset as a string */ string get_dataset_level (int i_dataset) { return get_msg_key (i_dataset, GribFileData::level_id); }; /** * @brief print the keys of the message number @a i_msg * The @a keys_filter parametre is set using the GRIB API constants as described in http://www.ecmwf.int/publications/manuals/grib_api/group__keys__iterator.html * @param i_msg indice of the message to read * @param keys_filter sets which type of keys should be printed. All are printed by default */ void print_msg_keys (int i_msg, unsigned int keys_filter = GRIB_KEYS_ITERATOR_ALL_KEYS); /** * @brief print the keys of the message number @a i_msg * The @a keys_filter parametre is set using the GRIB API constants as described in http://www.ecmwf.int/publications/manuals/grib_api/group__keys__iterator.html * @param i_msg indice of the message to read * @param v_keys [OUT] vector that stores the key = value pairs as strings * @param keys_filter sets which type of keys should be printed. All are printed by default */ void get_msg_keys (int i_msg, vector < pair < string, string > > & v_keys, unsigned int keys_filter = GRIB_KEYS_ITERATOR_ALL_KEYS); /** * @brief display the keys of all messages * The @a keys_filter parametre is set using the GRIB API constants as described in http://www.ecmwf.int/publications/manuals/grib_api/group__keys__iterator.html * @param keys_filter sets which type of keys should be printed. All are printed by default */ void print_keys(unsigned int keys_filter = GRIB_KEYS_ITERATOR_ALL_KEYS); /** * @brief retrieve the native data type of the specified dataset * @param dataset_name name of a dataset * @return the datatype */ int get_dataset_data_type(string dataset_name); /** * @brief print all variables entries */ void print_param_table (void); // {return this->get_hdf_file()->get_dataset_data_type(dataset_name.c_str());}; // string get_values_attr_dataset(string dataset_name,string attr_name); // bool has_attr_dataset(string dataset_name,string attr_name); /* string get_values_attr(string attr_name); string get_file_attr(string attr_name); bool has_attr(string attr_name); bool has_file_attr(string attr_name);*/ /************************ ** DATA ACCESS METHODS ** ************************/ /** * @brief read a variable data * @warning due to the GRIB basic types, all data will be returned as double * @param data the output buffer of read values as float64 (void* is used for interfaces compatibility). If NULL, allocation is done internally * @param dataset_name name of the variable to read * @param start indices of extraction start. If NULL, start at (0,0) if rank is 2 ; (0,0,0) if rank is 3... * @param stride step between 2 interesting values. Ignored at this time * @param edges number of values to read along each dimension. if NULL, it will be all data along each dimension. * @param rank the dimension of start, stride and edges * @return a pointer to the read data array. It is useful when you let this method manage the allocation to update the allocated pointer to the data. If you've done it yourself or you are using a static buffer, it isn't necessary to catch this return pointer */ void * read_data (void * data, const char * dataset_name, int * start = NULL, int * stride = NULL, int * edges = NULL, int rank = -1); /** * @brief read the whole data buffer of a GRIB message * Due to the GRIB format, the message is always read as double. The cast to T is done in a second time * @warning returned buffer. If allocated internally (ie outdata is NULL), the caller must manage the deallocation * @param i_msg indice of the message to read * @param sz_dim number of values in the buffer along each dimensions, as [Y,X] * @param outdata output buffer. If NULL, allocation is done internally. If not NULL, it is supposed to already have the good size and no control will be done on it * @return the message data buffer */ template T * get_msg_data (const int i_msg, vector & sz_dim, T * outdata = NULL); /** * @brief print out the data buffer (as double) of a message * @param i_msg indice of the message to read */ const void print_msg_data (const int i_msg); /** * @brief guess the Region Of Interest selectors start, stride and edges if their are NULL * @param ds_name [IN] name of the variable to read * @param start [INOUT] indices of extraction start. If NULL, start at (0,0) if rank is 2 ; (0,0,0) if rank is 3... * @param stride [INOUT] step between 2 read values. Ignored at this time * @param edges [INOUT] number of values to read along each dimension. if NULL, it will be all data along each dimension. * @param rank [INOUT] the number of dimensions of the dataset * @param is_selector_allocated [INOUT] tells which selector between start, stride and edges are allocated in the method. Used to correctly free them after use */ void init_selector (const char* ds_name, int *&start, int *&stride, int *&edges, int &rank, bool is_selector_allocated[3]); /** * @brief frees the selectors that have been internally allocated * @param is_selector_allocated tells which selector between start, stride and edges are internally allocated, and so, which one to free * @param start indices of extraction start. If NULL, start at (0,0) if rank is 2 ; (0,0,0) if rank is 3... * @param stride step between 2 read values. Ignored at this time * @param edges number of values to read along each dimension. if NULL, it will be all data along each dimension */ void free_selector(const bool *is_selector_allocated, int *start, int *stride, int *edges); /** * @brief check the validity of the start, stride and edges selectors for the given datasets. * It mainly checks if one is out of bounds. * @throw g_exception in case of mismatch * @param ds_name name of the variable * @param start indices of extraction start. If NULL, start at (0,0) if rank is 2 ; (0,0,0) if rank is 3... * @param stride step between 2 read values. Ignored at this time * @param edges number of values to read along each dimension. if NULL, it will be all data along each dimension. * @param rank number of dimensions of the dataset */ void check_selector(const char* ds_name, int *start, int *stride, int *edges, const int rank); // void get_fillValue(const string &dataset_name, void *fillValue); // void get_scaling(const string &dataset_name, float64 &scale, float64 &offset) { // string key ("parameterName"); // return get_msg_key (i_dataset, key); // }; /** * @brief retrieve the scaling factors of the given message * @param i_msg indice of the searched message * @param scale scale factor * @param offset add offset */ void get_scaling(int i_msg, double &scale, double &offset) { string s_scale = get_msg_key (i_msg, GribFileData::scale_factor_id); string s_offset = get_msg_key (i_msg, GribFileData::add_offset_id); scale = MyTools::to_num (s_scale); offset = MyTools::to_num (s_offset); }; /** * @brief return the number of messages (ie records) in the file * @return the number of messages in the file */ const int get_n_msg() const { return n_msg; }; private : /** * @brief read the number of messages (ie records) in the file */ void init_n_msg (); /** * @brief place the Grib API handle on the message numbered i_msg * @a i_msg starts from 0 * @param i_msg indice of the searched message * @return a not NULL grib_handle to the message. NULL if not found */ grib_handle * get_handle (int i_msg); /** * @brief free a GRIB handle and rewind the file * @param handle a valid GRIB API handle */ void release_handle (grib_handle *handle); /** * @brief fill in a hash map that constructs the variables descriptor */ void init_param_table (void); /** * @brief print the content of a variable descriptor * @param it_var_desc pointer to a t_var_desc object */ static void print_var_desc ( const pair & var_table_elt) { cout << "---" << var_table_elt.first << "---" << endl; cout << var_table_elt.second << endl; }; /** * @brief constructs the data type ID of a variale, as a string like int16, float32, etc using the keys dataFlag and dataWidth read in a message * @warning only valid for GRIB1 files * dataFlag table description at : http://www.ecmwf.int/publications/manuals/d/gribapi/fm92/grib1/detail/ctables/11/ * @param data_flag bitmask describing the binary values * @param data_width number of bits used to code a value * @return the data type as a string */ static string parse_type_id (const unsigned int data_flag, const int data_width) { string type_id (""); // check if data is stored as float or integers : 3rd bit from right // printf ( "flag = %lx width = %ld\n", data_flag, data_width ); // cout << (data_flag & 0x4) << " " << ((data_flag & 0x4) >> 3) << endl; bool is_int = ((data_flag & 0x4) >> 3) == 1; if (is_int) type_id = "int"; else type_id = "float"; // add data width type_id += MyTools::to_string(data_width); return type_id; }; }; template T * GribFileData::get_msg_data(const int i_msg, vector & sz_dim, T * outdata) { // --- seek message number i_msg grib_handle * handle = get_handle (i_msg); if (handle == NULL) { string msg = "Fail to seek GRIB message number " + MyTools::to_string(i_msg); g_exception e( __FILE__ , __LINE__ , msg ); throw e; } // cout << get_dataset_name (i_msg) << " " << get_msg_key (i_msg, "level") << endl; // --- allocate inner buffer size_t sz_buf; GRIB_CHECK (grib_get_size (handle, GribFileData::data_id.c_str(), &sz_buf), 0); double * inner_buf = NULL; // if the output data are requested in double and outdata already allocated, a unique buffer can be used for read and returned data. Else a temporary double buffer is needed. if (outdata != NULL && typeid (T) == typeid(double)) { inner_buf = outdata; } else { inner_buf = new double [sz_buf]; if (inner_buf == NULL) { g_exception e(__FILE__ , __LINE__ , "Unable to allocate buffer\n"); throw e; } } // --- read data GRIB_CHECK(grib_get_double_array(handle, GribFileData::data_id.c_str(), inner_buf, &sz_buf), 0); // --- read output dimensions size sz_dim.resize (2); long long_int; GRIB_CHECK(grib_get_long(handle, GribFileData::x_dim_id.c_str(), &long_int),0); sz_dim[1] = long_int; GRIB_CHECK(grib_get_long(handle, GribFileData::y_dim_id.c_str(), &long_int),0); sz_dim[0] = long_int; release_handle(handle); // --- format returned output buffer if (typeid (T) == typeid(double)) { // double typed output is requested : just return the inner buffer return inner_buf; } else { // innner and returned buffer does not have the same type -> allocate returned one and cast data outdata = new T [sz_buf]; if (outdata == NULL) { g_exception e(__FILE__ , __LINE__ , "Unable to allocate buffer\n"); throw e; } copy (inner_buf, inner_buf + sz_buf, outdata); free (inner_buf); } return outdata; } #endif