在 Oracle 中,我可以使用“NEAR((a,b,c), 5)”之类的查询在 Clob 中进行搜索。这是来自 oracle 的文档:
使用 NEAR 运算符返回基于两个或多个查询词的接近度的分数。Oracle Text 为文档中相距较近的术语返回较高的分数,而为相距较远的术语返回较低的分数。
我怎样才能在 Postgres 中做到这一点?我只需要一个可以搜索另一个词附近的词的索引。
在 Oracle 中,我可以使用“NEAR((a,b,c), 5)”之类的查询在 Clob 中进行搜索。这是来自 oracle 的文档:
使用 NEAR 运算符返回基于两个或多个查询词的接近度的分数。Oracle Text 为文档中相距较近的术语返回较高的分数,而为相距较远的术语返回较低的分数。
我怎样才能在 Postgres 中做到这一点?我只需要一个可以搜索另一个词附近的词的索引。
这是一个汉明距离函数http://en.wikipedia.org/wiki/Hamming_distance
头文件
/**
@file HammingDistance.h A C header file to compute the Hamming Distance between two strings
as PostgreSQl C User Defined Function
*/
#ifndef HAMMINGDISTANCE_H_INCLUDED
#define HAMMINGDISTANCE_H_INCLUDED
DLLEXPORT Datum DistanceCstring(PG_FUNCTION_ARGS);
DLLEXPORT Datum Distance(PG_FUNCTION_ARGS);
#endif // HAMMINGDISTANCE_H_INCLUDED
源文件
/**
@file HammingDistance.c A C source file to compute the Hamming Distance between two strings
as PostgreSQl C User Defined Function
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "postgres.h"
#include "utils/geo_decls.h"
#include "utils/builtins.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#define VC_EXTRALEAN
#pragma warning (disable : 4996)
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
#ifdef _WIN32
#define DLLEXPORT _declspsec(dllexport)
#else
#define DLLEXPORT
#endif // _WIN32
static int32_t DistanceString( unsigned char * a, int32_t la , unsigned char * b, int32_t lb);
static int32_t DistanceUChar( unsigned char a, unsigned char b);
PG_FUNCTION_INFO_V1(Distance);
/**
DistanceCstring An SQL function to Compute Hamming Distance between two text types
@param[in] A a Text type;
@param[in] B a text type
@return The hamming Distance between two Text types
**/
DLLEXPORT Datum Distance(PG_FUNCTION_ARGS)
{
text * a = PG_GETARG_TEXT_PP(0);
text * b = PG_GETARG_TEXT_PP(1);
unsigned char * ac;
unsigned char * bc;
int32_t distance = 0;
ac = text_to_cstring( a );
bc = text_to_cstring( b );
distance = DistanceString( ac, strlen(ac), bc, strlen(bc) );
PG_RETURN_INT32(distance);
}
PG_FUNCTION_INFO_V1(DistanceCstring);
/**
DistanceCstring An SQL function to Compute Hamming Distance between two strings
@param[in] A a Cstring type
@param[in] B a Cstring type
@return The hamming Distance between two Cstring types
**/
DLLEXPORT Datum DistanceCstring(PG_FUNCTION_ARGS)
{
unsigned char * ac = (unsigned char *) PG_GETARG_CSTRING(0);
unsigned char * bc = (unsigned char *) PG_GETARG_CSTRING(1);
int32_t distance = 0;
distance = DistanceString( ac, strlen(ac), bc, strlen(bc) );
PG_RETURN_INT32(distance);
}
/**
DistanceString Compute Hamming Distance between two unsigned char strings
@param[in] a an unsigned char array
@param[in] la length of a in char
@param[in] b an unsigned char array
@param[in] lb length of b in char
@return Hamming distance
**/
static int32_t DistanceString( unsigned char * a, int32_t la , unsigned char * b, int32_t lb)
{
unsigned char * smaller;
unsigned char * larger;
int i = 0;
int length = 0;
int32_t distance = 0;
int delta = 0;
if ( lb > la )
{
delta = lb - la;
length = la;
smaller = a;
larger = b;
}
else
{
delta = la - lb;
length = lb;
smaller = b;
larger = a;
}
for( i = 0; i < length; i++ )
{
distance += DistanceUChar( * smaller++, * larger++);
}
for( i = 0; i < delta ; i++ )
{
distance += DistanceUChar( 0, * larger++);
}
return distance;
}
/**
DistanceUChar Compute Hamming Distance between two unsigned chars
@param[in] a unsigned char
@param[in] b unsigned char
@return Hamming Distance between two unsigned chars
**/
static int32_t DistanceUChar( unsigned char a, unsigned char b)
{
unsigned char x = a ^ b;
int32_t distance = 0;
if ( (x & 0x1 )== 0x1 )
distance++;
if ( (x & 0x2) == 0x2 )
distance++;
if ( (x & 0x4) == 0x4 )
distance++;
if ( (x & 0x8) == 0x8 )
distance++;
if ( x & 0x10 == 0x10 )
distance++;
if ( (x & 0x20) == 0x20 )
distance++;
if ( (x & 0x40) == 0x40 )
distance++;
if ( (x & 0x80) == 0x80 )
distance++;
return distance;
}
生成文件
OPTS := -g -fpic -c -I /opt/PostgreSQL/9.1/include/postgresql/server
INSTALLDIR := /opt/PostgreSQL/9.1/lib/postgresql
all: HammingDistance.so
HammingDistance.so: HammingDistance.c HammingDistance.h
gcc HammingDistance.c $(OPTS) -o HammingDistance.o
gcc -shared -o HammingDistance.so HammingDistance.o
clean:
rm -rf *.o *.so
register:
psql -f install.sql -p 5433 -U postgres -d postgres ;
install:
sudo cp HammingDistance.so $(INSTALLDIR);
test:
psql -f Test.sql -p 5433 -U postgres -d postgres ;
安装 SQL
-- define the schema
set search_path to public;
-- Remove existing function
drop function if exists Distance( text, text ) cascade;
drop function if exists Distance( cstring , cstring ) cascade;
-- Create the new one
create or replace function Distance( text, text ) returns integer
as '$libdir/HammingDistance', 'Distance'
language c strict;
create or replace function Distance( cstring, cstring ) returns integer
as '$libdir/HammingDistance', 'DistanceCstring'
language c strict