您可以使用Levenshtein 距离
编辑
因为我自己最终需要类似的东西,所以这个问题仍然悬而未决。这是我玩过的一些代码。直线向上的字符串距离以及将 Levenshtein 算法应用于路径标记。
C++ 代码
/*
----- String based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
2 : this/is/a/strong/path
2 : that/is/a/strange/path
4 : is/this/a/strange/path
5 : this/is/a/strange
13 : this/is/a
15 : this/is
16 : that/was
18 : this
24 : completely/different/folder
----- Path based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
1 : this/is/a/strange
2 : this/is/a/strong/path
2 : that/is/a/strange/path
2 : this/is/a
2 : is/this/a/strange/path
3 : this/is
4 : this
7 : that/was
8 : completely/different/folder
*/
#include <string>
#include <vector>
#include <algorithm>
#include <stdio.h>
/// returns the smaller of two parameters
template< typename T >
T levmin( T v1, T v2 )
{ return ( v1 < v2 ) ? v1 : v2; }
/// Returns the Levenshtein distance between the specified strings
template < typename T, typename T_STR >
typename T_STR::size_type levstr(const T_STR &s1, const T_STR &s2)
{
typename T_STR::size_type l1 = s1.length(), l2 = s2.length();
std::vector< typename T_STR::size_type > d( ( l1 + 1 ) * ( l2 + 1 ) );
typename T_STR::size_type i, j;
for ( i = 0; i <= l1; i++ )
d[ i * l2 ] = i;
for ( i = 0; i <= l2; i++ )
d[ i ] = i;
for ( i = 1; i <= l1; i++ )
for ( j = 1; j <= l2; j++ )
d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ),
d[ ( i - 1 ) * l2 + ( j - 1 ) ] + ( s1[ i - 1 ] == s2[ j - 1 ] ? 0 : 1 )
);
return d[ ( l1 * l2 ) + l2 ];
}
/// Returns the Levenshtein distance between the specified split paths
template < typename T, typename T_STR, typename T_LST >
typename T_STR::size_type levpath(const T_LST &lst1, const T_LST &lst2)
{
typename T_STR::size_type l1 = lst1.size(), l2 = lst2.size();
std::vector< typename T_STR::size_type > d( ( l1 + 1 ) * ( l2 + 1 ) );
typename T_STR::size_type i, j;
for ( i = 0; i <= l1; i++ )
d[ i * l2 ] = i;
for ( i = 0; i <= l2; i++ )
d[ i ] = i;
for ( i = 1; i <= l1; i++ )
for ( j = 1; j <= l2; j++ )
d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ),
d[ ( i - 1 ) * l2 + ( j - 1 ) ] + levstr< T, T_STR>( lst1[ i - 1 ], lst2[ j - 1 ] )
);
return d[ ( l1 * l2 ) + l2 ];
}
/// Generic split path function
/*
Returns a vector of path tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_LST splitpath( const T_STR &s, const T sep )
{ T_LST lst;
typename T_STR::size_type i = 0, l = 0;
while( T_STR::npos != ( i = s.find_first_of( sep, i ) ) )
{ if ( l < i )
lst.push_back( T_STR( s, l, i - l ) );
l = ++i;
} // end while
if ( l < s.length() )
lst.push_back( T_STR( s, l, s.length() - l ) );
return lst;
}
/// Generic join path function
/*
Returns a string of joined vector tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_STR joinpath( const T_LST &p, const T sep )
{ T_STR s;
for ( typename T_LST::const_iterator it = p.begin(); it != p.end(); it++ )
{ if ( s.length() ) s += sep; s += *it; }
return s;
}
// String types
typedef char t_levchar;
typedef std::basic_string< t_levchar > t_levstr;
typedef std::vector< t_levstr > t_levpath;
typedef std::vector< t_levpath > t_levpathlist;
// Sort compare for split paths
template< typename T, typename T_STR, typename T_LST > struct levcmp
{ levcmp( const T_LST &p ) { m_p = p; }
bool operator() ( const T_LST &i, const T_LST &j )
{ return levpath< T, T_STR, T_LST >( i, m_p ) < levpath< T, T_STR, T_LST >( j, m_p ); }
T_LST m_p;
};
// Sort compare for strings
template< typename T, typename T_STR > struct levcmp_str
{ levcmp_str( const T_STR &s ) { m_s = s; }
bool operator() ( const T_STR &i, const T_STR &j )
{ return levstr< T, T_STR >( i, m_s ) < levstr< T, T_STR >( j, m_s ); }
T_STR m_s;
};
int main( int argc, char* argv[] )
{
// Path to compare with
const t_levchar* compare_path = "this/is/a/strange/path";
// Paths to sort
const t_levchar* path_list[] =
{
"this/is/a/strong/path",
"that/is/a/strange/path",
"this/is/a/strange",
"this/is",
"this/is/a",
"this",
"this/is/a/strange/path",
"is/this/a/strange/path",
"that/was",
"completely/different/folder",
0
};
printf( "\n----- String based Levenshtein ----\n"
"Matching : %s\n\n", compare_path );
// Create vector from paths
std::vector< t_levstr > paths;
for( int i = 0; path_list[ i ]; i++ )
paths.push_back( path_list[ i ] );
// Sort the paths
std::sort( paths.begin(), paths.end(), levcmp_str< t_levchar, t_levstr >( compare_path ) );
// Show the result
for ( std::vector< t_levstr >::iterator it = paths.begin(); it != paths.end(); it++ )
printf( "%d : %s\n",
(int)levstr< t_levchar, t_levstr >( *it, compare_path ),
(const char*)it->c_str() );
printf( "\n----- Path based Levenshtein ----\n"
"Matching : %s\n\n", compare_path );
// Create vector from split paths
t_levpath splitcompare = splitpath< t_levchar, t_levstr, t_levpath >( compare_path, '/' );
t_levpathlist splitpaths;
for ( int i = 0; path_list[ i ]; i++ )
splitpaths.push_back( splitpath< t_levchar, t_levstr, t_levpath >( path_list[ i ], '/' ) );
// Sort the paths
std::sort( splitpaths.begin(), splitpaths.end(),
levcmp< t_levchar, t_levstr, t_levpath >( splitcompare ) );
// Show the results
for ( t_levpathlist::iterator it = splitpaths.begin(); it != splitpaths.end(); it++ )
printf( "%d : %s\n",
(int)levpath< t_levchar, t_levstr, t_levpath >( *it, splitcompare ),
(const char*)joinpath< t_levchar, t_levstr, t_levpath >( *it, '/' ).c_str() );
return 0;
}