6

我对 Python 很熟悉,所以希望我能正确地表达这个问题。

总体问题涉及从 Python 调用 C 例程。通过将一些相关的 SO 问题/答案组合在一起,我可以非常接近,但我似乎无法让事情完全正确地结合在一起。有两个方面:一是用指针调用C程序,二是使用回调函数。

背景 Rubner 提供了一个用 C 编写的 Earth Movers Distance (EMD) 例程 [ EMD C 代码位置] 他还提供了两个调用 EMD 例程的示例 C 程序。我正在尝试开发一个 Python 例程作为替代,例如,将调用 EMD 例程的 example2.c。(是的,我熟悉 EMD 的 OpenCV 实现。)

为方便起见,这里是我想从 python 调用的 emd.c 代码的头文件:

/* DEFINITIONS */
#define MAX_SIG_SIZE   100
#define MAX_ITERATIONS 500
#define INFINITY       1e20
#define EPSILON        1e-6

/*****************************************************************************/
/* feature_t SHOULD BE MODIFIED BY THE USER TO REFLECT THE FEATURE TYPE      */
typedef int feature_t; 
/* typedef struct { int X,Y,Z; } feature_t;*/
/*typedef struct { int X; } feature_t; */
/*****************************************************************************/

typedef struct
{
  int n;                /* Number of features in the signature */
  feature_t *Features;  /* Pointer to the features vector */
  float *Weights;       /* Pointer to the weights of the features */
} signature_t;

typedef struct
{
  int from;             /* Feature number in signature 1 */
  int to;               /* Feature number in signature 2 */
  float amount;         /* Amount of flow from "from" to "to" */
} flow_t;

float emd(signature_t *Signature1, signature_t *Signature2,
      float (*func)(feature_t *, feature_t *),
      flow_t *Flow, int *FlowSize);

#endif

最后,这是我到目前为止拼凑在一起的 Python 代码。我认为(但不确定)我已经正确设置了结构。(请注意,这是 Rubner emd.c 代码中可能的特征结构的简化版本。我最终希望让整个事情正常工作,但我现在开始很简单。)我遇到的第一个问题在调用函数的 argtypes 中某处。我尝试了一些变体,但网络上可用的示例非常少,而且我碰壁了。

import ctypes

MAX_FEATURE_SIZE = 30
ARRAYFE = ctypes.c_int*MAX_FEATURE_SIZE
ARRAYWE= ctypes.c_float*MAX_FEATURE_SIZE 
ARRAYFL = ctypes.c_float*(2*MAX_FEATURE_SIZE-1)
flowSize = ctypes.c_int

emdlib = ctypes.CDLL('emdlib.dylib')
ctypes.CMPFUNC = ctypes.CFUNCTYPE(ctypes.c_float, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int))

def py_dist_func(f1,f2):
    print "dist: ", f1, f2
    return(abs(f1-f2))

dist = ctypes.CMPFUNC(py_dist_func)

n = ctypes.c_int
flowSize = ctypes.c_int

class flow_t(ctypes.Structure):
    _fields_ = [("from", ctypes.c_int),
                ("to", ctypes.c_int),
                ("amount", ctypes.c_float)]

class signature_t(ctypes.Structure):
    _fields_ = [("N", n),("feature", ARRAYFE),
                ("weight", ARRAYWE)]

# emdlib.emd.argtypes = [ctypes.POINTER(signature_t), ctypes.POINTER(signature_t), ctypes.POINTER(ctypes.c_float), ctypes.POINTER(flow_t), ctypes.POINTER(ctypes.c_int)]

# emdlib.emd.argtypes = [ctypes.POINTER(signature_t), ctypes.POINTER(signature_t), ctypes.CMPFUNC(py_dist_func), ctypes.POINTER(flow_t), ctypes.POINTER(ctypes.c_int)]


emdlib.emd.argtypes = [ctypes.POINTER(signature_t), ctypes.POINTER(signature_t), ctypes.c_float, ctypes.POINTER(flow_t), ctypes.POINTER(ctypes.c_int)]

# emd.restype  = ctypes.c_float
emdlib.emd.restype  = flow_t

signature1=signature_t()
signature2=signature_t()
feature1 = ARRAYFE
feature2 = ARRAYFE
weight1 =ARRAYWE
weight2 = ARRAYWE

feature1 = [0,1,2,3]
feature2 = [0,3]
weight1 = [1,1,1,1]
weight2 = [1,1] 

#signature1= [4,feature1, weight1]
#signature2 = [2, feature2, weight2]
# sample: arr = (ctypes.c_int * len(pyarr))(*pyarr)

signature1.N = len(feature1)
signature1.feature = (ctypes.c_int * MAX_FEATURE_SIZE)(*feature1)
signature2.feature = (ctypes.c_int * MAX_FEATURE_SIZE)(*feature2)
signature1.weight = (ctypes.c_float * MAX_FEATURE_SIZE)(*weight1)
signature2.weight = (ctypes.c_float * MAX_FEATURE_SIZE)(*weight2)


e = emdlib.emd(ctypes.byref(signature1), ctypes.byref(signature2), dist, ctypes.POINTER(flow_t), flowSize)

print "EMD= ", e
print "flowSize", flowSize

任何关于我哪里出错的建议将不胜感激。

我确定我会遇到的第二个问题是返回指针的参数类型;这里的任何建议也将不胜感激。

提前致谢。

-------------- 更新(工作)代码

import ctypes
import math
import itertools

MAX_FEATURE_SIZE = 25

FEATURE_t = ctypes.c_int
FEATURE_ptr = ctypes.POINTER(FEATURE_t)

WEIGHT_t = ctypes.c_float
WEIGHT_ptr = ctypes.POINTER(WEIGHT_t)

COUNT_t = ctypes.c_int
COUNT_ptr = ctypes.POINTER(COUNT_t)

class FLOW_t(ctypes.Structure):
    _fields_ = [("frm", ctypes.c_int),
                ("to", ctypes.c_int),
                ("amount", ctypes.c_float)]

# Note that ctypes.POINTER is compatible with a ctypes array declared
# as TYPE * array_len.  This is equivalent to the way we can say 'char
# *foo = "ABCDEF"' in C.
class SIGNATURE_t(ctypes.Structure):
    _fields_ = [("N", COUNT_t ),
                ("feature", FEATURE_ptr),
                ("weight", WEIGHT_ptr)]

FLOW_ARRAY_t = FLOW_t * (2*MAX_FEATURE_SIZE - 1)
CMPFUNC_t = ctypes.CFUNCTYPE(ctypes.c_float, FEATURE_ptr, FEATURE_ptr)

SIGNATURE_ptr = ctypes.POINTER(SIGNATURE_t)
FLOW_ptr = ctypes.POINTER(FLOW_t)

# Convenience function - keeps us from having to remember all the types and parameters later on

def make_signature(features, weights):
    sig = SIGNATURE_t()
    sig.N = len(features)
    sig.feature = (len(features) * FEATURE_t)(*features)
    sig.weight = (len(weights) * WEIGHT_t)(*weights)
    return sig

# We want to pass into C a custom distance function from Python
def py_dist_func(f1,f2):
#   print "f1, f2: %d, %d" % ( f1[0], f2[0] )
    d= distance(f1[0],f2[0])
    return d

# set this up as a holder for distance function between any two n-D points
def distance(p0,p1):
    return(math.fabs(p0-p1))

dist_callback = CMPFUNC_t(py_dist_func)

#print "Importing emdlib"
emdlib = ctypes.CDLL('emdlib.dylib')
#print "Setting argtypes"
emdlib.emd.argtypes = [ SIGNATURE_ptr,
                        SIGNATURE_ptr,
                        CMPFUNC_t,
                        FLOW_ptr,
                        COUNT_ptr ]
#print "Setting restype"
emdlib.emd.restype  = ctypes.c_float

feature1 = [0, 1,2,3,4,5,6,7,8]
feature2 = [0, 1,2,3,4,5,6,7,8]
weight1 = [0.275,0.296,0.002,0.131,0.208,0.048,0.058,0.098,0.455]
weight2 = [0.285,0.421,0.028,0.021,0.240,0.166,0.023,0.054,0.469]

#print "Creating signatures"
signature1 = make_signature(feature1, weight1)
signature2 = make_signature(feature2, weight2)

flow_array = FLOW_ARRAY_t()
flow_size = COUNT_t()

#print "Calling EMD"
e = emdlib.emd(ctypes.byref(signature1),
               ctypes.byref(signature2),
               dist_callback,
               flow_array,
               ctypes.byref(flow_size))

print "EMD= ", e
print "Number of FlowS", flow_size.value

print "Flow"
print "from to amount"
totalFlow=0.0
for i in range(0,flow_size.value):
#   print "Flow from %d to %d amount :%f" %(flow_array[i].frm, flow_array[i].to, flow_array[i].amount)
    print "  %d  %d  %f" %(flow_array[i].frm, flow_array[i].to, flow_array[i].amount)
    totalFlow=totalFlow+flow_array[i].amount

#
# now adjust EMD to account for different signature masses and make it a metric
alpha=1.0

mass1=sum(weight1)
mass2=sum(weight2)

fList=[feature1,feature2]

max_distance= 0.0
for p0, p1 in list(itertools.product(*fList)):
#   print p0,p1, distance(p0,p1), max_distance
    max_distance = max(max_distance, distance(p0, p1))

print "\nMax distance= %f" % max_distance
print "Total Source = %f" % mass1
print "Total Demand = %f" % mass2
print "Total Flow= %f\n " % totalFlow
print "Alpha = %f\n" %alpha

# emdHat = e*totalFlow+math.sqrt((mass1-mass2)*(mass1-mass2))*alpha*max_distance
emdHat = e*totalFlow+math.fabs((mass1-mass2))*alpha*max_distance
print "Corrected Earth Movers Distance \n"
print "emdHat = %f\n" % emdHat;
4

1 回答 1

1

通过各种神秘的方法和宝贵的意见,我终于得到了一段代码。正如我在评论中提到的,我不确定礼仪是什么,但我已经看到了足够多的类似问题,我认为发布最后的代码会很有用。它不漂亮,如果您发现它对清理它足够有用,我会很感激一个指向更优雅实现的链接。

于 2013-07-30T23:40:34.310 回答