python - 使用 pyhdf 时找不到字段

Question

我目前正在处理HDF文件（第 4 版），并且我使用该pyhdf模块（http://hdfeos.org/software/pyhdf.php）。

当我使用 MATLAB 在 MATLAB 中打开我的一个 HDF 文件时nctoolbox，我得到以下变量：

>> a = ncgeodataset('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')

a = 

  ncgeodataset with properties:

     location: '2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf'
       netcdf: [1x1 ucar.nc2.dataset.NetcdfDataset]
    variables: {16x1 cell}

>> a.variables

ans = 

    'StructMetadata.0'
    '2B-CLDCLASS/Geolocation Fields/Profile_time'
    '2B-CLDCLASS/Geolocation Fields/UTC_start'
    '2B-CLDCLASS/Geolocation Fields/TAI_start'
    '2B-CLDCLASS/Geolocation Fields/Height'
    '2B-CLDCLASS/Geolocation Fields/Range_to_intercept'
    '2B-CLDCLASS/Geolocation Fields/DEM_elevation'
    '2B-CLDCLASS/Geolocation Fields/Vertical_binsize'
    '2B-CLDCLASS/Geolocation Fields/Pitch_offset'
    '2B-CLDCLASS/Geolocation Fields/Roll_offset'
    '2B-CLDCLASS/Geolocation Fields/Latitude'
    '2B-CLDCLASS/Geolocation Fields/Longitude'
    '2B-CLDCLASS/Data Fields/Data_quality'
    '2B-CLDCLASS/Data Fields/Data_status'
    '2B-CLDCLASS/Data Fields/Data_targetID'
    '2B-CLDCLASS/Data Fields/cloud_scenario'

使用 python，pyhdf我只看到 2 个变量：

>>> d = SD('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
>>> d.datasets()
{
  'cloud_scenario': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 1), 
          'Height': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 0)
}

如果有人可以帮助我弄清楚这里发生了什么。

score 2 · Accepted Answer

您正在使用 pyhdf.SD 打开 hdf 文件，它只允许您查看科学数据集 (SDS)。似乎缺少的字段是 Vdata 字段，而不是 SDS，因此您必须使用 pyhdf.HDF 和 pyhdf.VS 分别访问它们。

就像是：

from pyhdf.HDF import *
from pyhdf.VS import *

open_file_for_reading_vdata = HDF("your_input_file.hdf", HC.READ).vstart()
vdata = open_file_for_reading_vdata.vdatainfo()
print vdata

有关更多详细信息，请尝试以下链接： http: //pysclint.sourceforge.net/pyhdf/documentation.html

score 1 · Accepted Answer

要在 python 上读取 HDF 文件中的所有数据，pyhdf.V的描述包含以下程序，显示任何 HDF 文件中包含的 vgroup 的内容

from pyhdf.HDF import *
from pyhdf.V   import *
from pyhdf.VS  import *
from pyhdf.SD  import *

import sys

def describevg(refnum):
    # Describe the vgroup with the given refnum.
    # Open vgroup in read mode.
    vg = v.attach(refnum)
    print "----------------"
    print "name:", vg._name, "class:",vg._class, "tag,ref:",
    print vg._tag, vg._refnum

    # Show the number of members of each main object type.
    print "members: ", vg._nmembers,
    print "datasets:", vg.nrefs(HC.DFTAG_NDG),
    print "vdatas:  ", vg.nrefs(HC.DFTAG_VH),
    print "vgroups: ", vg.nrefs(HC.DFTAG_VG)

    # Read the contents of the vgroup.
    members = vg.tagrefs()

    # Display info about each member.
    index = -1
    for tag, ref in members:
        index += 1
        print "member index", index
        # Vdata tag
        if tag == HC.DFTAG_VH:
            vd = vs.attach(ref)
            nrecs, intmode, fields, size, name = vd.inquire()
            print "  vdata:",name, "tag,ref:",tag, ref
            print "    fields:",fields
            print "    nrecs:",nrecs
            vd.detach()

        # SDS tag
        elif tag == HC.DFTAG_NDG:
            sds = sd.select(sd.reftoindex(ref))
            name, rank, dims, type, nattrs = sds.info()
            print "  dataset:",name, "tag,ref:", tag, ref
            print "    dims:",dims
            print "    type:",type
            sds.endaccess()

        # VS tag
        elif tag == HC.DFTAG_VG:
            vg0 = v.attach(ref)
            print "  vgroup:", vg0._name, "tag,ref:", tag, ref
            vg0.detach()

        # Unhandled tag
        else:
            print "unhandled tag,ref",tag,ref

    # Close vgroup
    vg.detach()
#
# Open HDF file in readonly mode.
# filename = sys.argv[1]
filename = path_FRLK+NameHDF_FRLK
hdf = HDF(filename)

# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v  = hdf.vgstart()

# Scan all vgroups in the file.
ref = -1
while 1:
    try:
        ref = v.getid(ref)
        print ref
    except HDF4Error,msg:    # no more vgroup
        break
    describevg(ref)

以下函数提取V模式下HDF文件的数据

def HDFread(filename, variable, Class=None):
    """
    Extract the data for non-scientific data in V mode of hdf file
    """
    hdf = HDF(filename, HC.READ)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()

    # Found the class id
    if Class == None:
        ref = v.findclass('SWATH Vgroup') # The default value for Geolocation fields
    else:
        ref = v.findclass(Class)

    # Open all data of the class
    vg = v.attach(ref)
    # All fields in the class
    members = vg.tagrefs()

    nrecs = []
    names = []
    for tag, ref in members:
        # Vdata tag
        vd = vs.attach(ref)
        # nrecs, intmode, fields, size, name = vd.inquire()
        nrecs.append(vd.inquire()[0])  # number of records of the Vdata
        names.append(vd.inquire()[-1]) # name of the Vdata
        vd.detach()

    idx = names.index(variable)
    var = vs.attach(members[idx][1])
    V   = var.read(nrecs[idx])
    var.detach()
    # Terminate V, VS and SD interfaces.
    v.end()
    vs.end()
    sd.end()
    # Close HDF file.
    hdf.close()

    return array(V)

该程序适用于CloudSat的 HDF 文件。

score 0 · Accepted Answer

在无法访问数据的情况下，据我所知，该文件看起来包含模块无法读取netcdf的数据。pyhdf等效于的 pythonnctoolbox似乎是netCDF4。但是，您最好使用更高级的工具xray，它为处理此类文件提供了更方便的数据结构。

python - 使用 pyhdf 时找不到字段

3 回答 3

Related

Reference