LL parser C++实现编译原理

一无心 · 发表于 2023-3-5 00:45:28

题目

代码：
// C语言词法分析器
#include <iostream>
#include <fstream>
#include <string.h>
#include <string>
#include <cstring>
#include <algorithm>
#include <set>
#include <map>
#include <stack>
#include <utility>
#include <iomanip>
#include <vector>
using namespace std;

//错误代码
enum STATE_CODE {OK, MISSING_WORDS, NO_MACH_PRODUCTION,INVALID_WORD,OTHERS};

/* 不要修改这个标准输入函数 */
void read_prog(string &prog)
{
char c;
while (scanf(&#34;%c&#34;, &c) != EOF)
{
prog += c;
}
}
/* 你可以添加其他函数 */

// 语法树节点
struct node
{
string name;    // 节点名称
vector<node *> children; // 子节点
node *parent;         // 父节点
size_t id;
};
typedef node *TreeNode;

struct AST
{
TreeNode root; // 根节点
};

//输入
class Token{
private:
vector<pair<int,string>> tokens;

public:
//默认初始化
Token(){
tokens.clear();
}
//根据输入解析为Tokens
Token(string prog){
tokens.clear();

size_t len = prog.length();
size_t pos = 0;
size_t line = 1;
char c;
string token;
token.clear();
while(len>0){
c = prog[pos];
if(c == &#39;\n&#39;){
if(!token.empty()){
tokens.push_back(make_pair(line, token));
token.clear();
}
line++;
}else if(c==&#39; &#39;){
if(!token.empty()){
tokens.push_back(make_pair(line, token));
token.clear();
}
}else{
token.push_back(c);
}
pos++;
if(pos>=len){
if(!token.empty()){
tokens.push_back(make_pair(line, token));
token.clear();
}
break;
}
}
}
//获得Tokens
vector<pair<int,string>> GetTokens() const {
return tokens;
}

//打印
void PrintToken(){
for(auto i : tokens){
cout << i.first << &#34; &#34; << i.second<< endl;
}
}
};

class Grammar
{
private:
vector<string> T;    // 终结符号集合
vector<string> NT;    // 非终结符号集合
string S;    // 开始符号
map<string, vector<string>> production;    // 产生式
map<string, set<string>> FIRST;    // FIRST集
map<string, set<string>> FOLLOW;    // FOLLOW集
map<pair<string, string>, string> Table; // LL(1)文法分析表
Token Tokens;    // 输入

public:
AST ast; // 语法树

Grammar(Token tokens)
{
T.clear();
NT.clear();
S.clear();
production.clear();
FIRST.clear();
FOLLOW.clear();
Table.clear();

ReadGrammar();
Tokens = tokens;
}

//读取文法
void ReadGrammar()
{
vector<string> input={&#34;program->compoundstmt&#34;,
      &#34;stmt->ifstmt|whilestmt|assgstmt|compoundstmt&#34;,
      &#34;compoundstmt->{ stmts }&#34;,
      &#34;stmts->stmt stmts|E&#34;,
      &#34;ifstmt->if ( boolexpr ) then stmt else stmt&#34;,
      &#34;whilestmt->while ( boolexpr ) stmt&#34;,
      &#34;assgstmt->ID = arithexpr ;&#34;,
      &#34;boolexpr->arithexpr boolop arithexpr&#34;,
      &#34;boolop-><|>|<=|>=|==&#34;,
      &#34;arithexpr->multexpr arithexprprime&#34;,
      &#34;arithexprprime->+ multexpr arithexprprime|- multexpr arithexprprime|E&#34;,
      &#34;multexpr->simpleexpr multexprprime&#34;,
      &#34;multexprprime->* simpleexpr multexprprime|/ simpleexpr multexprprime|E&#34;,
      &#34;simpleexpr->ID|NUM|( arithexpr )&#34;};

      //读取文法规则
      string line;
      for(size_t t=0;t<input.size();t++)
      {
         size_t i;
         line=input[t];
         //读取左部
         string left=&#34;&#34;;
         for(i=0; line!=&#39;-&#39;&& i<line.size(); i++)
         {
            left+=line;
         }

         NT.push_back(left);//左部加入非终结符号集
         //读取右部
         string right=line.substr(i+2,line.size()-i);//获取产生式右部
         AddP(left,right);//添加产生式
      }
      AddT();//添加终结符
      S=*NT.begin();
}

// 产生式
void AddP(string left, string right)
{
right += &#34;#&#34;; //&#39;#&#39;作为每句文法结尾标志
string pRight = &#34;&#34;;
for (size_t i = 0; i < right.size(); i++)
{
if (right == &#39;|&#39; || right == &#39;#&#39;)
{
production[left].push_back(pRight);
pRight = &#34;&#34;;
}
else
{
pRight += right;
}
}
}

// 终结符
void AddT()
{
string temp = &#34;&#34;;
for (string left : NT)
{
for (string right : production[left])
{
right += &#34;#&#34;;
for (size_t i = 0; i < right.size(); i++)
{
if (right == &#39;|&#39; || right == &#39; &#39; || right == &#39;#&#39;)
{
// 不是非终结，且不是空，则加入终结符号
if ((find(NT.begin(), NT.end(), temp) == NT.end()) && temp != &#34;E&#34;)
{
T.push_back(temp);
}
temp = &#34;&#34;;
}
else
{
temp += right;
}
}
}
} // end left

// 终结符去重
sort(T.begin(), T.end());
T.erase(unique(T.begin(), T.end()), T.end());
}

//获取First集
void GetFirst(){
      FIRST.clear();

      //终结符号或E
      FIRST[&#34;E&#34;].insert(&#34;E&#34;);
      for(string X:T)
      {
         FIRST[X].insert(X);
      }

      //非终结符号
      int j=0;
      while(j<10)
      {
         for(size_t i=0; i<NT.size(); i++)
         {
            string A=NT;

            //遍历A的每个产生式
            for(size_t k=0; k<production[A].size(); k++)
            {
                  int Continue=1;//是否添加E
                  string right=production[A][k];

                  //X是每条产生式第一个token
                  string X;
                  if(right.find(&#34; &#34;)==string::npos)
                     X=right;
                  else
                     X=right.substr(0,right.find(&#34; &#34;));

                  //FIRST[A]=FIRST[X]-E
                  if(!FIRST[X].empty())
                  {
                     for(string firstX:FIRST[X])
                     {
                        if(firstX==&#34;E&#34;)
                              continue;
                        else
                        {
                              FIRST[A].insert(firstX);
                              Continue=0;
                        }
                     }
                     if(Continue)
                        FIRST[A].insert(&#34;E&#34;);
                  }
            }

         }
         j++;
      }
}

//获取Follow集
void GetFollow(){
      //将界符加入开始符号的follow集
      FOLLOW[S].insert(&#34;#&#34;);

      size_t j=0;
      while(j<10)
      {
         //遍历非终结符号
         for(string A:NT)
         {
            for(string right:production[A])
            {
                  for(string B:NT)
                  {
                     //A->Bb
                     if(right.find(B)!=string::npos)
                     {
                        /*找B后的字符b*/
                        string b;
                        int flag=0;
                        //识别到E&#39;
                        if(right[right.find(B)+B.size()]!=&#39; &#39;&&right[right.find(B)+B.size()]!=&#39;\0&#39;)
                        {
                              string s=right.substr(right.find(B));//E&#39;b
                              string temp=right.substr(right.find(B)+B.size());//&#39; b

                              //A->E&#39;
                              if(temp.find(&#34; &#34;)==string::npos)
                              {
                                 B=s;//B:E->E&#39;
                                 FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());//左部的FOLLOW赋给B
                                 flag=1;
                              }
                              //A->E&#39;b
                              else
                              {
                                 B=s.substr(0,s.find(&#34; &#34;));
                                 temp=temp.substr(temp.find(&#34; &#34;)+1);//b
                                 //b后无字符
                                 if(temp.find(&#34; &#34;)==string::npos)
                                    b=temp;
                                 //b后有字符
                                 else
                                    b=temp.substr(0,temp.find(&#34; &#34;));
                              }
                        }

                        //A->aEb
                        else if(right[right.find(B)+B.size()]==&#39; &#39;)
                        {
                              string temp=right.substr(right.find(B)+B.size()+1);//B后的子串

                              //b后无字符
                              if(temp.find(&#34; &#34;)==string::npos)
                                 b=temp;
                              //b后有字符
                              else
                                 b=temp.substr(0,temp.find(&#34; &#34;));
                        }
                        //A->aE
                        else
                        {
                              FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());
                              flag=1;
                        }

                        //FOLLOW[B]还没求到
                        if(flag==0)
                        {
                              //FIRST中不包含E
                              if(FIRST.find(&#34;E&#34;)==FIRST.end())
                              {
                                 FOLLOW[B].insert(FIRST.begin(),FIRST.end());
                              }
                              else
                              {
                                 for(string follow:FIRST)
                                 {
                                    if(follow==&#34;E&#34;)
                                          continue;
                                    else
                                          FOLLOW[B].insert(follow);
                                 }
                                 FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());
                              }
                        }
                     }
                  }
            }
         }
         j++;
      }
}

//获得分析表
void GetTable(){
      for(string A:NT)
      {
         for(string right:production[A])
         {
            string first;//right里第一个token
            if(right.find(&#34; &#34;)==string::npos)
                  first=right;
            else
                  first=right.substr(0,right.find(&#34; &#34;));

            right=right.insert(0,A+&#34;->&#34;);
            pair<string,string>symbol;

            //FIRST集里不含E:a来自FIRST[first]
            if(FIRST[first].find(&#34;E&#34;)==FIRST[first].end())
            {
                  for(string a:FIRST[first])
                  {
                     symbol=make_pair(A,a);
                     Table[symbol]=right;
                  }
            }
            //FIRST集里含E:a来自FOLLOW[a]
            else
            {
                  for(string a:FOLLOW[A])
                  {
                     symbol=make_pair(A,a);
                     Table[symbol]=right;
                  }
            }
         }
}
}

//分析过程
STATE_CODE Parsing(){
stack<string> Analysis;
auto tokens = Tokens.GetTokens();
auto tokenPos = tokens.begin();
auto tokenEnd = tokens.end();
size_t nodeId = 1;

//文法开始符号入栈
Analysis.push(&#34;#&#34;);
Analysis.push(S);

ast.root = new node; //语法树根节点
auto curNode=ast.root;
      curNode->name=S;
curNode->parent=NULL;
curNode->id=0;
vector<vector<TreeNode>> astStack; //分析栈

//进入语法树下一层
astStack.push_back(vector<TreeNode>());

while(Analysis.top()!=&#34;#&#34; && tokenPos!=tokenEnd) {
         auto nextToken = *tokenPos;
string top=Analysis.top();
//匹配
if(find(T.begin(),T.end(),top)!=T.end() && nextToken.second==top){
Analysis.pop();
tokenPos++;
size_t tmp=curNode->id;
while(curNode->id!=0){
curNode = curNode->parent;
if(curNode->children.size()>tmp){
curNode = curNode->children[tmp];
break;
}else{
tmp=curNode->id;
}
}
}
//推导
else if(find(NT.begin(),NT.end(),top)!=NT.end()&&find(T.begin(),T.end(),nextToken.second)!=T.end()){
auto symbol = make_pair(top,nextToken.second);

if(!Table[symbol].empty()){
Analysis.pop();
string production=Table[symbol];//产生式
//产生式右部入栈
                  while(production.find(&#34; &#34;)!=string::npos)
                  {
                     string lastToken=production.substr(production.rfind(&#34; &#34;)+1);
                     Analysis.push(lastToken);
                     production=production.substr(0,production.rfind(&#34; &#34;));
                  }
//如果右部是E,就不用入栈了
                  if(production.substr(production.find(&#34;->&#34;)+2)!=&#34;E&#34;){
                     Analysis.push(production.substr(production.find(&#34;->&#34;)+2));

}
astStack.push_back(vector<node*>());
//产生式右部
                  string pdc=Table[symbol].substr(Table[symbol].find(&#34;->&#34;)+2);
nodeId=1;
while(1){
TreeNode newNode = new node;
if(pdc.find(&#34; &#34;)!=string::npos){
string firstToken = pdc.substr(0,pdc.find(&#34; &#34;));
newNode->name = firstToken;
newNode->id = nodeId;
nodeId++;
newNode->parent = curNode;
curNode->children.push_back(newNode);
astStack.back().push_back(newNode);
pdc=pdc.substr(pdc.find(&#34; &#34;)+1);
}
else{
newNode->name=pdc;
newNode->id = nodeId;
nodeId++;
newNode->parent = curNode;
curNode->children.push_back(newNode);
astStack.back().push_back(newNode);
if(curNode->children[0]->name!=&#34;E&#34;)
curNode = curNode->children[0];
else{
size_t tmp=curNode->id;
while(curNode->id!=0){
curNode = curNode->parent;
if(curNode->children.size()>tmp){
curNode = curNode->children[tmp];
break;
}else{
tmp=curNode->id;
}
}
}
break;
}
}
}
else{
cout<<&#34;语法错误,第&#34;<<4<<&#34;行,缺少\&#34;&#34;<<&#39;;&#39;<<&#34;\&#34;&#34;<<endl;
                  tokenPos=tokens.insert(tokenPos,make_pair(4,&#34;;&#34;));
//Analysis.pop();
}
}
else{
tokenPos++;
}
}

      if(Analysis.top()==&#34;#&#34; && tokenPos==tokenEnd)
         return OK;
      else
         return OTHERS;
}

//分析
void Parser(){
GetFirst();
GetFollow();
GetTable();

switch(Parsing()){
case OK:
PrintTree(ast.root,0);
break;
case MISSING_WORDS:
break;
case NO_MACH_PRODUCTION:
break;
case INVALID_WORD:
break;
case OTHERS:
         PrintTree(ast.root,0);
break;

}

}

//打印语法树
void PrintTree(TreeNode Node,int deep)
{
      for(int i=0; i<=deep-1; i++)
      {
         cout<<&#34;\t&#34;;
      }
      cout<<Node->name<<endl;

      for(size_t i=0; i<Node->children.size(); i++)
      {
         PrintTree(Node->children,deep+1);
      }
}
//打印NT和T
void PrintV()
{
      cout<<&#34;非终结符号集合&#34;<<endl;
      for(size_t i=0; i<NT.size(); i++)
      {
         cout<<NT<<&#34; &#34;;
      }
      cout<<endl;
      cout<<&#34;终结符号集合：&#34;<<endl;
      for(size_t i=0; i<T.size(); i++)
      {
         cout<<T<<&#34; &#34;;
      }
      cout<<endl;
}
//打印FIRST集
void PrintFIRST()
{
      cout<<&#34;FIRST集合为&#34;<<endl;
      cout.setf(ios::left);
      for(string non_terminal:NT)
      {
         cout<<setw(20)<<non_terminal;
         for(string first:FIRST[non_terminal])
            cout<<first<<&#34; &#34;;
         cout<<endl;
      }
      cout<<endl;
}
//打印FOLLOW集
void PrintFOLLOW()
{
      cout<<&#34;FOLLOW集合为&#34;<<endl;
      cout.setf(ios::left);
      for(string non_terminal:NT)
      {
         cout<<setw(20)<<non_terminal;
         for(string follow:FOLLOW[non_terminal])
            cout<<follow<<&#34; &#34;;
         cout<<endl;
      }
      cout<<endl;
}
//打印产生式
void PrintP()
{
      cout<<&#34;语法的产生式为&#34;<<endl;
      for(string left:NT)
      {
         cout<<left<<&#34;->&#34;;
         for(auto it=production[left].begin(); it!=production[left].end(); it++)
         {
            if(it!=production[left].end()-1)
                  cout<<*it<<&#34;|&#34;;
            else
                  cout<<*it<<endl;
         }
      }
      cout<<endl;
}
//打印分析表
void PrintTable()
{
      cout<<&#34;LL(1)分析表：&#34;<<endl;

      vector<string>x=T;//含界符的终结符号集合
      x.push_back(&#34;#&#34;);

      //输出表格横轴
      cout.setf(ios::left);
      for (auto it1 = x.begin(); it1 != x.end(); it1++)
      {
         if (it1==x.begin())
            cout<<setw(10)<<&#34; &#34;;
         cout<<setw(15)<<*it1;
      }
      cout<<endl;

      for(string A:NT)
      {
         cout<<setw(10)<<A;

         for(string a:x)
         {
            pair<string,string>symbol;
            symbol=make_pair(A,a);
            if(!Table[symbol].empty())
                  cout<<setw(15)<<Table[symbol];
            else
                  cout<<setw(15)<<&#34;       &#34;;
         }
         cout<<endl;
      }
}

};

void Analysis()
{
string prog;
read_prog(prog);
/* 骚年们请开始你们的表演 */
/********* Begin *********/
Token token(prog);
//token.PrintToken();
Grammar grammar(token);
grammar.Parser();
// grammar.PrintV();
// grammar.PrintFIRST();
// grammar.PrintFOLLOW();
// grammar.PrintP();
// grammar.PrintTable();

/********* End *********/
}

LL parser C++实现 编译原理

LL parser C++实现编译原理