import os, re
import functools
import ctypes
from ctypes import string_at, byref, sizeof, cast, POINTER, pointer, create_string_buffer, memmove
import numpy as np
import pandas as pd
class _StructBase(ctypes.Structure):
__type__ = 0
_fields_ = []
@classmethod
def Offsetof(cls, field):
pattern = '(?P<field>\w+)\[(?P<idx>\d+)\]'
mat = re.match(pattern, field)
if mat:
fields = dict(cls.Fields())
f = mat.groupdict()['field']
idx = mat.groupdict()['idx']
return cls.Offsetof(f) + int(idx) * ctypes.sizeof(fields[field])
else:
return getattr(cls, field).offset
@classmethod
def DType(cls):
map = {
ctypes.c_byte: np.byte,
ctypes.c_ubyte: np.ubyte,
ctypes.c_char: np.ubyte,
ctypes.c_int8: np.int8,
ctypes.c_int16: np.int16,
ctypes.c_int32: np.int32,
ctypes.c_int64: np.int64,
ctypes.c_uint8: np.uint8,
ctypes.c_uint16: np.uint16,
ctypes.c_uint32: np.uint32,
ctypes.c_uint64: np.uint64,
ctypes.c_float: np.float32,
ctypes.c_double: np.float64,
}
res = []
for k, v in cls.Fields():
if hasattr(v, '_length_'):
if v._type_ != ctypes.c_char:
for i in range(v._length):
res.append((k, map[v], cls.Offsetof(k)))
else:
res.append((k, 'S%d' % v._length_, cls.Offsetof(k)))
else:
res.append((k, map[v], cls.Offsetof(k)))
res = pd.DataFrame(res, columns=['name', 'format', 'offset'])
return np.dtype({
'names': res['name'],
'formats': res['format'],
'offsets': res['offset'],
})
@classmethod
def Attr(cls):
fields = cls._fields_
res = []
for attr, tp in fields:
if str(tp).find('_Array_') > 0 and str(tp).find('char_Array_') < 0:
for i in range(tp._length_):
res.append((attr + '[%s]' % str(i), tp._type_))
else:
res.append((attr, tp))
return res
@classmethod
def Fields(cls, notype=False):
res = [cls.Attr()]
cur_cls = cls
while True:
cur_cls = cur_cls.__bases__[0]
if cur_cls == ctypes.Structure:
break
res.append(cur_cls.Attr())
if notype:
return [k for k, v in functools.reduce(list.__add__, reversed(res), [])]
else:
return functools.reduce(list.__add__, reversed(res), [])
@classmethod
def size(cls):
return sizeof(cls)
@classmethod
def from_struct_binary(cls, path, max_count=2 ** 32, decode=True):
print(os.path.getsize(path), cls.size())
assert os.path.getsize(path) % cls.size() == 0
size = os.path.getsize(path) // cls.size()
size = min(size, max_count)
index = range(size)
array = np.fromfile(path, dtype=cls.DType(), count=size)
df = pd.DataFrame(array, index=index)
for attr, tp in eval(str(cls.DType())):
if re.match('S\d+', tp) is not None and decode:
try:
df[attr] = df[attr].map(lambda x: x.decode("utf-8"))
except:
df[attr] = df[attr].map(lambda x: x.decode("gbk"))
return df
class StructBase(_StructBase):
_fields_ = [
('Type', ctypes.c_uint32),
]
class IndexStruct(StructBase):
_fields_ = [
('Seq', ctypes.c_uint32),
('ExID', ctypes.c_char * 8),
('SecID', ctypes.c_char * 8),
('SecName', ctypes.c_char * 16),
('SourceID', ctypes.c_int32),
('Time', ctypes.c_uint32),
('PreClose', ctypes.c_uint32),
('Open', ctypes.c_uint32),
('High', ctypes.c_uint32),
('Low', ctypes.c_uint32),
('Match', ctypes.c_uint32),
]
df = IndexStruct.from_struct_binary('your path')
print(df)