$ cat tst.awk
NR==FNR {
strings[$0]
next
}
{
for (i=2; i<=NF; i++) {
if ($i in strings) {
print
next
}
}
}
$ awk -f tst.awk strings file
Name:email:username #registered
Name3:email3:username3 #registered #subscribed #phonever
$ cat strings
#registered
#subscribed
#phonever
$ cat file
Name:email:username #registered
Name2:email2:username2
Name3:email3:username3 #registered #subscribed #phonever
Name4:email4:username4 #unconfirmed
如果您的文件很大并且您的目标词集相对较小并且执行速度对您很重要,那么您可以这样做以生成这些目标词的每个可能的非空子集的所有可能组合:
$ cat subsets.awk
###################
# Calculate all subsets of a given set, see
# https://en.wikipedia.org/wiki/Power_set
function get_subset(A,subsetNr,numVals, str, sep) {
while (subsetNr) {
if (subsetNr%2 != 0) {
str = str sep A[numVals]
sep = " "
}
numVals--
subsetNr = int(subsetNr/2)
}
return str
}
function get_subsets(A,B, i,lgth) {
lgth = length(A)
for (i=1;i<2^lgth;i++) {
B[get_subset(A,i,lgth)]
}
}
###################
# Input should be a list of strings
{
split($0,A)
delete B
get_subsets(A,B)
for (subset in B) {
print subset
}
}
.
$ cat permutations.awk
###################
# Calculate all permutations of a set of strings, see
# https://en.wikipedia.org/wiki/Heap%27s_algorithm
function get_perm(A, i, lgth, sep, str) {
lgth = length(A)
for (i=1; i<=lgth; i++) {
str = str sep A[i]
sep = " "
}
return str
}
function swap(A, x, y, tmp) {
tmp = A[x]
A[x] = A[y]
A[y] = tmp
}
function generate(n, A, B, i) {
if (n == 1) {
B[get_perm(A)]
}
else {
for (i=1; i <= n; i++) {
generate(n - 1, A, B)
if ((n%2) == 0) {
swap(A, 1, n)
}
else {
swap(A, i, n)
}
}
}
}
function get_perms(A,B) {
generate(length(A), A, B)
}
###################
# Input should be a list of strings
{
split($0,A)
delete B
get_perms(A,B)
for (perm in B) {
print perm
}
}
.
$ echo '#registered #subscribed #phonever' |
awk -f subsets.awk |
awk -f permutations.awk
#registered #subscribed #phonever
#subscribed #phonever #registered
#phonever #subscribed #registered
#phonever #registered #subscribed
#subscribed #registered #phonever
#registered #phonever #subscribed
#phonever
#subscribed
#registered #subscribed
#subscribed #registered
#registered
#registered #phonever
#phonever #registered
#subscribed #phonever
#phonever #subscribed
然后你可以让剩下的处理只是一个简单的哈希查找:
$ echo '#registered #subscribed #phonever' |
awk -f subsets.awk |
awk -f permutations.awk |
awk 'NR==FNR{strings[$0];next} {k=(NF>1?$0:"");sub(/[^ ]+ /,"",k)} k in strings' - file
Name:email:username #registered
Name3:email3:username3 #registered #subscribed #phonever