我的程序的目标是通过一个字符串查看并能够取出对话问题和答案。
例如:
("do you like me?"
("yes" ("we're friends"))
("no" ("i hate you"))
)
该程序会取出“你喜欢我吗?” 并会让您选择输入是或否。一旦你选择了相应的选项,它就会抛出“我们是朋友”或“我恨你”。
有没有关于如何做到这一点的库或任何解决方案?
如果我错了,请纠正我,但 Lisp 解析器可以很好地完成这项工作。:P 说真的,这看起来像是带括号的字符串列表或其他带括号的表达式。一个简单的递归解析器就足够了,只需发明一个数据结构来创建为适合您需要的解析树。
编辑:该死,我终于成功了……嗯,我必须承认,在晚上 10 点到 12 点之间正确地启动一个非常简单的解析器也不是一件容易的事。
/*
* lrparser.c
* LR-parser
* A recursive Lisp-subset parser
* that has a misleading name (it's not an LALR, but a recursive descent one).
*
* Originally written to answer
* http://stackoverflow.com/questions/15371008/string-processing-in-c/
*
* Made in some *really* bored hours by Árpád Goreity (H2CO3)
* on 12-03-2013
*
* Language: C99 (not sure if POSIX)
*/
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
// AST node type
enum {
NODE_STRING,
NODE_LIST
};
// Permitted tokens
enum {
TOKEN_INVALID = -1,
TOKEN_LPAREN = 0,
TOKEN_RPAREN,
TOKEN_STRING,
TOKEN_END
};
// Useful for debugging and error reporting
static const char *toknames[] = {
"Left paren",
"Right paren",
"String",
"End"
};
// ...Or simply an AST node...
struct ParseTree {
int type; // string or list
char *string; // if any
struct ParseTree **children;
size_t n_children;
};
// Construct a node structure from a type and any necessary data
static struct ParseTree *node_new(int type, ...)
{
va_list args;
va_start(args, type);
struct ParseTree *node = malloc(sizeof(*node));
assert(node != NULL);
node->type = type;
if (type == NODE_STRING) {
/* If the node is a string, fill it
* (ownership transfer: the argument will be
* free()'d by the node_free() function)
*/
node->string = va_arg(args, char *);
}
node->children = NULL;
node->n_children = 0;
va_end(args);
return node;
}
void node_free(struct ParseTree *tree)
{
switch (tree->type) {
case NODE_STRING:
free(tree->string);
break;
case NODE_LIST:
for (int i = 0; i < tree->n_children; i++) {
node_free(tree->children[i]);
}
free(tree->children);
break;
default:
fprintf(stderr, "Warning: unknown node type %d\n", tree->type);
break;
}
free(tree);
}
// Sorry, the usual logarithmic storage expansion is omitted for clarity
void node_add(struct ParseTree *parent, struct ParseTree *child)
{
assert(parent != NULL);
assert(child != NULL);
parent->n_children++;
parent->children = realloc(parent->children, sizeof(parent->children[0]) * parent->n_children);
// Lazy error checking: assert() instead of compare to NULL
assert(parent->children != NULL);
parent->children[parent->n_children - 1] = child;
}
// Just in order to break thread safety
static const char *s = NULL; // the string to be parsed
static char *curstr = NULL; // the contents of the string value of the current token
static int curtok; // the current token
// The tokenizer
static int lex()
{
// Whitespace doesn't matter
while (isspace(s[0])) {
s++;
}
// end of string
if (s[0] == 0) {
return TOKEN_END;
}
// The followin four are obvious
if (s[0] == '(') {
s++;
return curtok = TOKEN_LPAREN;
}
if (s[0] == ')') {
s++;
return curtok = TOKEN_RPAREN;
}
if (s[0] == '"') {
const char *begin = s;
while (*++s != '"')
;
size_t sz = s - begin - 2 + 1;
curstr = malloc(sz + 1);
memcpy(curstr, begin + 1, sz);
curstr[sz] = 0;
// skip trailing quotation mark (")
s++;
return curtok = TOKEN_STRING;
}
return curtok = TOKEN_INVALID;
}
void expect(int tok)
{
if (curtok != tok) {
fprintf(stderr, "Error: expected token %s, got %s\n", toknames[tok], toknames[curtok]);
abort();
}
lex();
}
// a. k. a. "parse()"
// Simple recursive (one-level...) descent (root == list) approach
static struct ParseTree *recurse_and_descend()
{
expect(TOKEN_LPAREN);
struct ParseTree *node = node_new(NODE_LIST);
struct ParseTree *child;
while (curtok != TOKEN_RPAREN) {
if (curtok == TOKEN_LPAREN) {
child = recurse_and_descend();
} else if (curtok == TOKEN_STRING) {
child = node_new(NODE_STRING, curstr);
lex();
} else {
fprintf(stderr, "Unexpected token '%s'\n", toknames[curtok]);
// lazy programmer's safety system, let the kernel do the dirty work
abort();
}
node_add(node, child);
}
expect(TOKEN_RPAREN);
return node;
}
static struct ParseTree *parse(const char *str)
{
s = str; // poor man's initialization
lex(); // The first breath of fresh token makes the baby's heart beat
return recurse_and_descend(); // Let's do the Harlem shake!
}
// petite helper function
static void dump_indent(int indent)
{
for (int i = 0; i < indent; i++) {
printf("\t");
}
}
// Because 0x7f502a00 is not very meaningful for the human eye
static void dump_tree(struct ParseTree *tree, int indent)
{
dump_indent(indent);
switch (tree->type) {
case NODE_STRING:
printf("<String \"%s\">\n", tree->string);
break;
case NODE_LIST:
printf("<List>\n");
for (int i = 0; i < tree->n_children; i++) {
dump_tree(tree->children[i], indent + 1);
}
break;
default:
printf("Unknown node\n");
break;
}
}
int main(int argc, char *argv[])
{
struct ParseTree *tree = parse(argv[1]);
dump_tree(tree, 0);
node_free(tree);
return 0;
}
用法:
h2co3-macbook:~ h2co3$ ./lrparser "(\"do you like me?\" (\"yes\" (\"we're friends\")) (\"no\" (\"i hate you\" \"me too\")) )"
<List>
<String "do you like me?">
<List>
<String "yes">
<List>
<String "we're friends">
<List>
<String "no">
<List>
<String "i hate you">
<String "me too">
如果您想要“有效的东西”但不强大,那么很多技巧都可以奏效。如果你真的想让它工作,你需要研究一下LALR(1) 解析器,然后决定这是否足够简单来滚动你自己的解析器(它是),或者你是否想使用像 YACC 这样的东西。
上下文无关语法似乎看起来像
QUESTION => '(' '"' TEXT '"' RESPONSES ')'
RESPONSES => null | RESPONSE RESPONSES
RESPONSE => '(' '"' TEXT '(' '"' TEXT '"' ')' ')'
TEXT => all characters except '(' '"' ')'
然后你分析一下上述语言的哪些组合会导致处理的变化。基本上 RESPONSES 可以还原为什么都没有或以 '(' 开头的东西,这意味着在处理的那个时刻,您可以通过查看前瞻(不是但解析的字符)是'('或')'。
模式内的解析非常简单。如果字符是固定的,只需检查它是否与预期匹配,并推进已解析元素的索引。如果字符不固定(如在文本中),则使用例程检查它是否有界,并且任何超出预期的内容都会使解析器处于错误状态。