我已经使用 python PLY 库编写了一个解析器。
弹性搜索映射架构如下所示:
{
"settings": {
"index": {
"number_of_shards": "5",
"number_of_replicas": "1"
}
},
"mappings": {
"type1": {
"properties": {
"prop1": {
"type": "keyword"
},
"prop2": {
"type": "keyword"
},
"query": {
"properties": {
"regexp": {
"properties": {
"prop1": {
"type": "keyword"
},
"prop2": {
"type": "keyword"
}
}
}
}
}
}
},
"type2": {
"properties": {
"prop3": {
"type": "keyword"
},
"prop4": {
"type": "keyword"
},
"prop5": {
"type": "keyword"
}
}
}
}
}
解析器如下所示:
import ply.lex as lex
tokens = (
'LP',
'RP',
'FUNC1',
'FUNC2',
'OP',
'PARAM',
)
t_PARAM = r'[^ \/\(\),&:\"~]+'
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
t_ignore = ' \t'
def t_OP(t):
r' INTERSECT | UNION | MINUS '
return t
def t_LP(t):
r'\('
return t
def t_RP(t):
r'\)'
return t
def t_FUNC1(t):
r'FUNC1'
return t
def t_FUNC2(t):
r'FUNC2'
return t
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
lexer = lex.lex()
import ply.yacc as yacc
def p_expr_op_expr(p):
'expression : expression OP expression'
if p[2] == 'INTERSECT':
-- form elastic-search AND query (no idea how to write es query
here)
elif p[2] == 'MINUS':
-- form elastic-search MINUS query (no idea how to write es
query here)
elif p[2] == 'UNION':
-- form elastic-search OR query (no idea how to write es query
here)
def p_expr_paren(p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expr_func1(p):
'expression : FUNC1 LPAREN PARAM RPAREN'
-- form elastic-search query
-- such that in query we are referring to
-- prop1 from es schema and looking for PARAM
-- eg : {'prop1':'PARAM'} or
-- {'regexp': {'prop1': 'PARAM'}}
def p_expr_func2(p):
'expression : FUNC2 LPAREN PARAM RPAREN'
-- form elastic-search query
-- such that in query we are referring to
-- prop1 from es schema and looking for PARAM
-- eg : {'prop2':'PARAM'} or
-- {'regexp': {'prop2': 'PARAM'}}
def p_expr_param(p):
'expression : PARAM'
-- form elastic-search query
-- such that in query we are referring to
-- prop1 from es schema and looking for PARAM
-- eg : {'prop3':'PARAM'} or
-- {'regexp': {'prop3': 'PARAM'}}
def p_error(p):
print("Syntax error at '%s'" % p.value)
parser = yacc.yacc()
while True:
try:
s = input('input > ')
except EOFError:
break;
parser.parse(s)
输入查询示例:
1.) func1(foo) UNION func2(bar) => union is OR
2.) (func1(foo) UNION func2(bar.*)) MINUS baz
3.) 2.) (func1(foo) UNION func2(bar.*)) MINUS func1(boo)
4.) (foo.* UNION bar) INTERSECT baz.*
我的解析器工作得很好,但我不确定在解析后我应该如何形成 es 查询。例如:
如果我的输入是func1(foo) UNION func2(bar)
func1(foo)
将被函数解析为{'regexp': {'prop1': 'foo'}}
func2(bar)
将被函数解析为{'regexp': {'prop1': 'bar'}}
现在,它会起作用p_expr_op_expr
,因为它包含运算符 UNION,那么我如何在这里编写最终的 union es 查询?
请建议或指出一些示例,它们在通过 ply 解析后形成 es 查询,或者解析这些表达式并形成 es 查询的最佳方法是什么?
这里,表达式中的 func1、func2 有助于决定引用 es schema 的哪个属性。