0

所以我试图从一个输入文件生成一个符号表,该文件包含 C++ 中这样的 C 样式嵌套块;

A: { int a; float b;
B: { float c; int d;
C: { int b; int c;
}
}
D: { float a;
}
}

输出应如下所示。

A: a -> <int, A>
b -> <float, A>
B: a -> <int, A>
b -> <float, A>
c -> <float, B>
d -> <int, B>
C: a -> <int, A>
b -> <int, C> -> <float, A>
c -> <int C> -> <float, B>
d -> <int, local to B>
D: a -> <float D> -> <int, A>
b -> <float, A>

我已经尝试了很多东西。使用向量、地图,现在我终于决定使用多重地图。无论我做什么,我都会遇到同样的问题,所以它可能与我选择的数据结构无关。

问题是,因为我逐行阅读,我最终会比我需要的更多。但是,如果我没有它在 for 循环中为每一行计算/迭代多图,而不是在它们被擦除/弹出后进行迭代。我不确定如何在逻辑上明智地使输出按原样显示,或者我是否走在正确的轨道上。

到目前为止,这是我的 .cpp 文件。忽略评论,因为它们是我目前选择不使用的过去尝试。同样在这个版本中,我没有使用向量,因此您可以忽略与向量相关的代码。我现在只是在使用多图。

#include<iostream>
#include<fstream>
#include<string>
#include <sstream> 
#include <map>
#include <vector>
#include <algorithm>
using namespace std;

void ReadFromFile();
void main(){

    ReadFromFile();
    cin.get();
}

void ReadFromFile(){

    stringstream ss;
    string type = "";
    string var = "";
    string lable = "";
    string Obraket = "";
    string Cbraket = "";
    int braketCount = -1;

    ifstream myfile("input1.txt");
    multimap<string, string> symbol;
    multimap<string, multimap<string, string>> symbolL;
    if (myfile.is_open())
    {
        for (string line; getline(myfile, line);)
        {

            istringstream in(line);
            if (in.str().find("}") == string::npos && in.str().find("{") != string::npos){

                in >> lable;
                in >> Obraket;

                braketCount++;
                cout << Obraket << endl;
                in >> type;
                in >> var;
                symbol.insert(pair<string, string>(var.substr(0, 1), type));

                if (in.str().find("float") != string::npos || in.str().find("int") != string::npos){

                    var = "";
                    type = "";
                    in >> type;
                    in >> var;
                    if (type.length() > 1){
                        symbol.insert(pair<string, string>(var.substr(0, 1), type));
                    }
                }

                symbolL.insert( pair<string, multimap<string, string>>(lable,symbol));

                    for (multimap<string, multimap<string, string>>::iterator it = symbolL.begin(); it != symbolL.end(); ++it){
                    cout << it->first;
                    for (multimap<string, string>::iterator it2 = symbol.begin(); it2 != symbol.end(); ++it2){
                        cout << it2->first << "-> " << "<" << it2->second << ">, " << it->first.substr(0, 1) << endl;
                    }
                    }
            }
            else if (in.str().find("}") != string::npos){
                in >> Cbraket;
                //braketCount--;
                cout << Cbraket << endl;
                symbolL.erase(prev(symbolL.end()));

                //symbol.erase(prev(symbol.end()));
            }

        }

        myfile.close();
    }
    else cout << "Unable to open file";



}

这是我得到的输出。

{
A:a-> <int>, A
b-> <float>, A
{
A:a-> <int>, A
b-> <float>, A
c-> <float>, A
d-> <int>, A
B:a-> <int>, B
b-> <float>, B
c-> <float>, B
d-> <int>, B
{
A:a-> <int>, A
b-> <float>, A
b-> <int>, A
c-> <float>, A
c-> <int>, A
d-> <int>, A
B:a-> <int>, B
b-> <float>, B
b-> <int>, B
c-> <float>, B
c-> <int>, B
d-> <int>, B
C:a-> <int>, C
b-> <float>, C
b-> <int>, C
c-> <float>, C
c-> <int>, C
d-> <int>, C
}
}
{
A:a-> <int>, A
a-> <float>, A
b-> <float>, A
b-> <int>, A
c-> <float>, A
c-> <int>, A
d-> <int>, A
D:a-> <int>, D
a-> <float>, D
b-> <float>, D
b-> <int>, D
c-> <float>, D
c-> <int>, D
d-> <int>, D
}
}
4

2 回答 2

1

我会建议顶层的结构(即 astruct或 a class),具有std::map那些顶层结构。然后每个结构又包含一个std::map用于所包含符号的结构,其中又包含一个包含符号类型等内容的结构。

像这样简单的事情:

struct LocalSymbol
{
    std::string name;
    enum
    {
        FLOAT,
        INT
    } type;
    // Possibly other information needed for local symbols
};

struct GlobalSymbol
{
    std::string name;
    // Possibly other information needed for global symbols
    std::map<std::string, LocalSymbol> locals;
}

std::map<std::string, GlobalSymbol> globals;

这将很容易地为您提供您似乎想要的嵌套结构,并将所有相关数据紧密地保存在更小的结构中。

您的另一个大问题似乎是解析,我建议您阅读有关编译器和解析的更多信息,并尝试实现更传统的词法分析器类型的解析器,将输入处理和解析分成两个组件。如果您想手动编码解析器部分,我建议使用递归下降样式解析器,这将使处理范围和级别变得非常容易。

于 2014-11-30T08:24:52.453 回答
0

这里是!

如果您接受建议,请开始阅读 main 函数而不是顶部的类型。

我不需要多图。我没有复制父块的变量,而是仅引用父容器及其索引。就在打印之前,会遍历最顶层的块,它会收集当前块中可见的所有变量。

#include <algorithm>
#include <cassert>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <stack>
#include <string>
#include <vector>

using namespace std;

typedef string type_t; //type of variable 
typedef string variable_t; //name of variable
typedef string class_t; //name of 'class' (container)

const int NO_PARENT = -1; //top-most

typedef vector<class ClassSymbolTable> symbols_t; //we use vector to preserve order

// main class, it stores symbols of a single class, and references its parent
class ClassSymbolTable {
  class_t _name; //class name
  map<variable_t, type_t> _types; //map of variable types
  vector<variable_t> _variables; //we use this vector to preserve order
  symbols_t& _symbols; //reference to the symbol table
  int _parent_index = NO_PARENT; //reference to parent index in symbol vector


  //!! parent class, nullptr if top-level
  ClassSymbolTable* parent() const { return _parent_index != NO_PARENT ? &_symbols[_parent_index] : nullptr; }

  // does this class directly declares var ?
  bool declares_variable(const variable_t& var) const {
    return _types.find(var) != _types.end();
  }

  // print variable info in desired format
  void print_variable(const variable_t& var) {
    if (declares_variable(var)) {
      cout << " -> <" << _types[var] << ", " << _name << ">";
    }
    if (parent()) {
      parent()->print_variable(var);
    }
  }

  // traverse classes up to top-level and collect variables in order
  void collect_variables_to_print(vector<variable_t>& vars) {
    if (ClassSymbolTable* p = parent()) {
      p->collect_variables_to_print(vars);
      // add variables defined on this level
      vector<variable_t> add_vars;
      for (size_t i = 0; i < _variables.size(); ++i) {
        if (find(vars.begin(), vars.end(), _variables[i]) == vars.end()) {
          // defined on this level
          add_vars.push_back(_variables[i]);
        }
      }
      vars.insert(vars.end(), add_vars.begin(), add_vars.end());
    }
    else {
      //top-level
      vars = _variables;
    }
  }

  // get depth for indentation
  int get_depth() const {
    int depth = 0;
    for (ClassSymbolTable* p = parent(); p; p = p->parent()) {
      ++depth;
    }
    return depth;
  }


  static size_t s_max_class_name_length; //for printing
public:
  // ctor
  ClassSymbolTable(const string& name, int parent_index, symbols_t& symbols)
    : _name(name), _parent_index(parent_index), _symbols(symbols)
  {
    s_max_class_name_length = max(s_max_class_name_length, name.length());
  }

  // add variable
  void add(const variable_t& var, const type_t& type) {
    _variables.push_back(var);
    _types[var] = type;
  }

  // print this class' vars in desired format
  void print() {
    cout.fill(' ');
    const int indent = get_depth() + s_max_class_name_length + 3 /*for ':' */;

    vector<variable_t> vars;
    collect_variables_to_print(vars);

    // print class name
    string classname = _name + ": ";

    cout.fill(' ');
    cout.width(indent);
    cout << classname;

    // print vars
    cout.width(0);
    cout << vars[0];
    print_variable(vars[0]);
    cout << endl;

    for (size_t i = 1; i < vars.size(); ++i) {
      cout.width(indent);
      cout << ' '; //pad before

      cout.width(0);
      cout << vars[i];
      print_variable(vars[i]);
      cout << endl;
    }
    cout.width(0);
  }


};

size_t ClassSymbolTable::s_max_class_name_length = 0;


int main(int argc, char* argv[])
{
  ifstream in("input1.txt");
  assert(in.is_open());

  symbols_t symbols;

  //collect symbols
  const char* delimiters = ":;{}";
  vector<string> current_tokens;
  string buffer;
  stack<int> class_stack; //to manage class hierarchy, we stack the classes' index in the symbol vector
  class_stack.push(NO_PARENT); //so we dont have to branch at first level
  while (in >> buffer) {
    size_t found = buffer.find_first_of(delimiters);
    current_tokens.push_back(buffer.substr(0, found)); //either whole or until delimiter

    if (found != string::npos) { //delimiter found
      char delimiter = buffer[found];
      switch (delimiter) {
      case ':': //class name
        assert(current_tokens.size() == 1);
        {
          // add new class symbol table and refer to parent class
          symbols.emplace_back(current_tokens[0], class_stack.top(), symbols);
          // we rather store index in '{' for symmetric code
        }
        break;
      case '{': //block open
        assert(!symbols.empty());
        {
          class_stack.push(symbols.size()-1); //stack the index for nested classes
        }
        break;
      case '}': //block close
        assert(!class_stack.empty());
        {
          class_stack.pop(); //done with this class
        }
        break;
      case ';': //variable
        assert(!symbols.empty());
        assert(current_tokens.size() == 2);
        {
          // add variable to the current class symbol table
          ClassSymbolTable& current_class = symbols.back();
          current_class.add(current_tokens[1], current_tokens[0]);
        }
        break;
      }

      //put back the remaining characters
      current_tokens.clear();
      if (found < buffer.size() - 1) {
        current_tokens.push_back(buffer.substr(found + 1));
      }

    }

  }
  assert(class_stack.size() == 1 && class_stack.top() == NO_PARENT); //just to be sure

  for (ClassSymbolTable& c : symbols) {
    c.print();
  }

  cout << "." << endl;
  return 0;
}

可以对其进行优化以避免在打印过程中查找,如果您只想打印它们也可以避免存储符号。您可以在这里和那里存储局部变量,但主要思想是相同的。是的,我正在使用另一个容器来管理嵌套,一个堆栈:)

只有使用 multimap,您的变量才会被打乱。不知何故,您必须跟踪订单。

我正在使用向量来做到这一点。

(如果无法编译 C++11,只需在 main 的最后替换基于范围的 for 循环)

于 2014-11-29T23:26:35.320 回答