5

The HDF5 data storage uses the C convention, i.e. if I am storing a matrix A(N,M,K) in a binary file, the fastest changing dimension of the stored data will have size N. Apparently when I use the Fortran wrapper of HDF5, HDF5 automatically transposes the matrix, to be consistent with C.

I have a data of size (256 by 128 by 256) stored in an unformatted binary file written by fortran. I am trying to convert it into h5 format by using a program given below. But the final output is giving me the dimensions of the stored matrix as (128,256,256). I have no idea what to do to make sure that the final hd5 file can be rightly visualized in the visualizing software (Paraview).

PROGRAM H5_RDWT

 USE HDF5 ! This module contains all necessary modules

 IMPLICIT NONE


 CHARACTER(LEN=6), parameter :: out_file = "out.h5"  ! File name
 CHARACTER(LEN=6), parameter :: in_file  = "in.dat" ! File name
 CHARACTER(LEN=4), parameter :: dsetname =  "vort"! Dataset name
 CHARACTER(LEN=50) :: len

 INTEGER(HID_T) :: in_file_id   ! File identifier
 INTEGER(HID_T) :: out_file_id  ! File identifier
 INTEGER(HID_T) :: dset_id      ! Dataset identifier
 INTEGER(HID_T) :: dspace_id    ! Dataspace identifier

 INTEGER :: in_file_id = 23

 INTEGER     :: nx = 256, ny=128, nz=256

 INTEGER(HSIZE_T), DIMENSION(3) :: dims             ! Dataset dimensions
 INTEGER     ::   rank = 3                          ! Dataset rank

 INTEGER     ::   error                  ! Error flag
 INTEGER     ::   i, j, k, ii, jj, kk    ! Indices

 REAL,    allocatable :: buff_r(:,:,:)   ! buffer for reading from input file

 dims(1) = nx
 dims(2) = ny
 dims(3) = nz
 allocate(buff_r(nx,ny,nz))

 ! Read the input data.
 open (in_file_id,FILE=in_file,form='unformatted',access='direct',recl=4*nx*ny*nz)  
 read (in_file_id,rec=1) buff_r


 ! Initialize FORTRAN interface of HDF5.
 CALL h5open_f(error)

 ! Create a new file.
 CALL h5fcreate_f (out_file, H5F_ACC_TRUNC_F, out_file_id, error)

 ! Create the dataspace.
 CALL h5screate_simple_f(rank, dims, dspace_id, error)


 ! Create the dataset with default properties.
 CALL h5dcreate_f(out_file_id, dsetname, H5T_NATIVE_REAL, dspace_id, &
         dset_id, error)

 ! Write the dataset.
 CALL h5dwrite_f(dset_id, H5T_NATIVE_REAL, buff_r, dims, error)

 ! End access to the dataset and release resources used by it.
 CALL h5dclose_f(dset_id, error)

 ! Terminate access to the data space.
 CALL h5sclose_f(dspace_id, error)

 ! Close the file.
 CALL h5fclose_f(out_file_id, error)

 ! Close FORTRAN interface.
 CALL h5close_f(error)

 deallocate(buff_r)

 END PROGRAM H5_RDWT

To illustrate what is happening, I am generating a sample data file using the following script:

  program main

  !-------- initialize variables -------------
  character(8) :: fname
  integer, parameter:: n = 32
  real*8, dimension(n,n,2*n) :: re
  integer i,j,k, recl
  Inquire( iolength =  recl ) re

  !------ fill in the array with sample data ----

  do k = 1, 2*n
     do j = 1, n
        do i = 1, n
           re(i,j,k) = 1.0
        end do
     end do
  end do

  !------ write in data in a file -----------
  write(fname, "(A)") "data.dat"
  open (10, file=fname, form='unformatted', access='direct', recl=recl)
  write(10,rec=1) re
  close(10)

  stop
  end program main

I copy pasted the program by Ian Bush and changed the values of nx, ny and nz to 32, 32 and 64 respectively. I would expect the generated h5 file to have dimensions (32,32,64). But it is coming out to be (64,32,32). Here is what is happening in my machine:

[pradeep@laptop]$gfortran generate_data.f90 
[pradeep@laptop]$./a.out 
[pradeep@laptop]$ls -l data.dat 
-rw-r--r--  1 pradeep  staff  524288 Mar 12 14:04 data.dat
[pradeep@laptop]$h5fc convert_to_h5.f90 
[pradeep@laptop]$./a.out 
[pradeep@laptop]$ls -l out.h5 
-rw-r--r--  1 pradeep  staff  526432 Mar 12 14:05 out.h5
[pradeep@laptop]$h5dump -H out.h5 
HDF5 "out.h5" {
GROUP "/" {
   DATASET "data" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 64, 32, 32 ) / ( 64, 32, 32 ) }
   }
}
}

Please confirm with me if you are seeing the same thing.

4

3 回答 3

6

在查看使用 Fortran 应用程序编写的 HDF5 文件时,我也遇到了麻烦。基本问题是 Fortran 和 C 存储多维数组的方式不同(Fortran 是列优先的,C 是行优先的),并且由于 Fortran HDF5 库是 C HDF5 库的接口,因此 Fortran 包装器会在传递数据之前转置维度进入 C 代码。同样,当 Fortran 应用程序读取 HDF5 文件时,Fortran 包装器会再次转置维度。

因此,如果您使用 Fortran 应用程序进行所有的写作和阅读,您应该不会注意到任何差异。如果您使用 Fortran 应用程序编写文件,然后使用 C 应用程序(例如 h5dump)读取它,则尺寸将出现转置。这不是一个错误,它就是它的工作原理。

如果要正确显示数据,请使用 Fortran 应用程序读取数据或使用 C 应用程序并先转置数据。(或者您可以在首先写入数据之前转置数据。)

如前所述,这在文档的第 7.3.2.5 节中得到了很好的解释:http ://www.hdfgroup.org/HDF5/doc/UG/UG_frame12Dataspaces.html

于 2013-07-17T18:05:25.873 回答
0

长评论真的而不是答案......

你能澄清为什么你认为它不起作用吗?一旦我更正了您代码中的几处

1) in_file_id 用 2 种不同的种类声明了两次

2) 直接访问文件的 Recl 不一定以字节为单位 - 通过输出列表查询更便携

我得到以下内容,它生成了一个带有随机数据的虚拟文件,似乎可以工作:

ian@ian-pc:~/test/stack$ cat hdf5.f90
PROGRAM H5_RDWT

 USE HDF5 ! This module contains all necessary modules

 IMPLICIT NONE


 CHARACTER(LEN=6), parameter :: out_file = "out.h5"  ! File name
 CHARACTER(LEN=6), parameter :: in_file  = "in.dat" ! File name
 CHARACTER(LEN=4), parameter :: dsetname =  "vort"! Dataset name
 CHARACTER(LEN=50) :: len

!!$ INTEGER(HID_T) :: in_file_id   ! File identifier
 INTEGER(HID_T) :: out_file_id  ! File identifier
 INTEGER(HID_T) :: dset_id      ! Dataset identifier
 INTEGER(HID_T) :: dspace_id    ! Dataspace identifier

 INTEGER(HID_T) :: in_file_id = 23

 INTEGER     :: nx = 256, ny=128, nz=256

 INTEGER(HSIZE_T), DIMENSION(3) :: dims             ! Dataset dimensions
 INTEGER     ::   rank = 3                          ! Dataset rank

 Integer :: recl

 INTEGER     ::   error                  ! Error flag
 INTEGER     ::   i, j, k, ii, jj, kk    ! Indices

 REAL,    allocatable :: buff_r(:,:,:)   ! buffer for reading from input file

 dims(1) = nx
 dims(2) = ny
 dims(3) = nz
 allocate(buff_r(nx,ny,nz))

 Inquire( iolength =  recl ) buff_r

 ! Read the input data.
 open (in_file_id,FILE=in_file,form='unformatted',access='direct',recl=recl)  
 read (in_file_id,rec=1) buff_r


 ! Initialize FORTRAN interface of HDF5.
 CALL h5open_f(error)

 ! Create a new file.
 CALL h5fcreate_f (out_file, H5F_ACC_TRUNC_F, out_file_id, error)

 ! Create the dataspace.
 CALL h5screate_simple_f(rank, dims, dspace_id, error)


 ! Create the dataset with default properties.
 CALL h5dcreate_f(out_file_id, dsetname, H5T_NATIVE_REAL, dspace_id, &
         dset_id, error)

 ! Write the dataset.
 CALL h5dwrite_f(dset_id, H5T_NATIVE_REAL, buff_r, dims, error)

 ! End access to the dataset and release resources used by it.
 CALL h5dclose_f(dset_id, error)

 ! Terminate access to the data space.
 CALL h5sclose_f(dspace_id, error)

 ! Close the file.
 CALL h5fclose_f(out_file_id, error)

 ! Close FORTRAN interface.
 CALL h5close_f(error)

 deallocate(buff_r)

 END PROGRAM H5_RDWT
ian@ian-pc:~/test/stack$ h5fc hdf5.f90
ian@ian-pc:~/test/stack$ ./a.out
ian@ian-pc:~/test/stack$ ls -l out.h5 
-rw-rw-r-- 1 ian ian 33556576 Mar 11 10:29 out.h5
ian@ian-pc:~/test/stack$ ncdump out.h5 | head
netcdf out {
dimensions:
    phony_dim_0 = 256 ;
    phony_dim_1 = 128 ;
variables:
    float vort(phony_dim_0, phony_dim_1, phony_dim_0) ;
data:

 vort =
  0.9975595, 0.5668247, 0.9659153, 0.7479277, 0.3673909, 0.4806369, 
ian@ian-pc:~/test/stack$ 

那你为什么觉得有问题?

于 2013-03-11T10:37:15.237 回答
0

出于安全原因,我建议您将矩阵分解为矢量形式,并将它们作为一维数据集存储在 HDF5 文件中。然后,在阅读时以相同的方式组装它们。用于H5SSELECT_HYPERSLAB_F写入/读取矩阵的切片。

于 2017-10-24T08:42:37.750 回答