|

题目
代码:
// C语言词法分析器
#include <iostream>
#include <fstream>
#include <string.h>
#include <string>
#include <cstring>
#include <algorithm>
#include <set>
#include <map>
#include <stack>
#include <utility>
#include <iomanip>
#include <vector>
using namespace std;
//错误代码
enum STATE_CODE {OK, MISSING_WORDS, NO_MACH_PRODUCTION,INVALID_WORD,OTHERS};
/* 不要修改这个标准输入函数 */
void read_prog(string &prog)
{
char c;
while (scanf(&#34;%c&#34;, &c) != EOF)
{
prog += c;
}
}
/* 你可以添加其他函数 */
// 语法树节点
struct node
{
string name; // 节点名称
vector<node *> children; // 子节点
node *parent; // 父节点
size_t id;
};
typedef node *TreeNode;
struct AST
{
TreeNode root; // 根节点
};
//输入
class Token{
private:
vector<pair<int,string>> tokens;
public:
//默认初始化
Token(){
tokens.clear();
}
//根据输入解析为Tokens
Token(string prog){
tokens.clear();
size_t len = prog.length();
size_t pos = 0;
size_t line = 1;
char c;
string token;
token.clear();
while(len>0){
c = prog[pos];
if(c == &#39;\n&#39;){
if(!token.empty()){
tokens.push_back(make_pair(line, token));
token.clear();
}
line++;
}else if(c==&#39; &#39;){
if(!token.empty()){
tokens.push_back(make_pair(line, token));
token.clear();
}
}else{
token.push_back(c);
}
pos++;
if(pos>=len){
if(!token.empty()){
tokens.push_back(make_pair(line, token));
token.clear();
}
break;
}
}
}
//获得Tokens
vector<pair<int,string>> GetTokens() const {
return tokens;
}
//打印
void PrintToken(){
for(auto i : tokens){
cout << i.first << &#34; &#34; << i.second<< endl;
}
}
};
class Grammar
{
private:
vector<string> T; // 终结符号集合
vector<string> NT; // 非终结符号集合
string S; // 开始符号
map<string, vector<string>> production; // 产生式
map<string, set<string>> FIRST; // FIRST集
map<string, set<string>> FOLLOW; // FOLLOW集
map<pair<string, string>, string> Table; // LL(1)文法分析表
Token Tokens; // 输入
public:
AST ast; // 语法树
Grammar(Token tokens)
{
T.clear();
NT.clear();
S.clear();
production.clear();
FIRST.clear();
FOLLOW.clear();
Table.clear();
ReadGrammar();
Tokens = tokens;
}
//读取文法
void ReadGrammar()
{
vector<string> input={&#34;program->compoundstmt&#34;,
&#34;stmt->ifstmt|whilestmt|assgstmt|compoundstmt&#34;,
&#34;compoundstmt->{ stmts }&#34;,
&#34;stmts->stmt stmts|E&#34;,
&#34;ifstmt->if ( boolexpr ) then stmt else stmt&#34;,
&#34;whilestmt->while ( boolexpr ) stmt&#34;,
&#34;assgstmt->ID = arithexpr ;&#34;,
&#34;boolexpr->arithexpr boolop arithexpr&#34;,
&#34;boolop-><|>|<=|>=|==&#34;,
&#34;arithexpr->multexpr arithexprprime&#34;,
&#34;arithexprprime->+ multexpr arithexprprime|- multexpr arithexprprime|E&#34;,
&#34;multexpr->simpleexpr multexprprime&#34;,
&#34;multexprprime->* simpleexpr multexprprime|/ simpleexpr multexprprime|E&#34;,
&#34;simpleexpr->ID|NUM|( arithexpr )&#34;};
//读取文法规则
string line;
for(size_t t=0;t<input.size();t++)
{
size_t i;
line=input[t];
//读取左部
string left=&#34;&#34;;
for(i=0; line!=&#39;-&#39;&& i<line.size(); i++)
{
left+=line;
}
NT.push_back(left);//左部加入非终结符号集
//读取右部
string right=line.substr(i+2,line.size()-i);//获取产生式右部
AddP(left,right);//添加产生式
}
AddT();//添加终结符
S=*NT.begin();
}
// 产生式
void AddP(string left, string right)
{
right += &#34;#&#34;; //&#39;#&#39;作为每句文法结尾标志
string pRight = &#34;&#34;;
for (size_t i = 0; i < right.size(); i++)
{
if (right == &#39;|&#39; || right == &#39;#&#39;)
{
production[left].push_back(pRight);
pRight = &#34;&#34;;
}
else
{
pRight += right;
}
}
}
// 终结符
void AddT()
{
string temp = &#34;&#34;;
for (string left : NT)
{
for (string right : production[left])
{
right += &#34;#&#34;;
for (size_t i = 0; i < right.size(); i++)
{
if (right == &#39;|&#39; || right == &#39; &#39; || right == &#39;#&#39;)
{
// 不是非终结,且不是空,则加入终结符号
if ((find(NT.begin(), NT.end(), temp) == NT.end()) && temp != &#34;E&#34;)
{
T.push_back(temp);
}
temp = &#34;&#34;;
}
else
{
temp += right;
}
}
}
} // end left
// 终结符去重
sort(T.begin(), T.end());
T.erase(unique(T.begin(), T.end()), T.end());
}
//获取First集
void GetFirst(){
FIRST.clear();
//终结符号或E
FIRST[&#34;E&#34;].insert(&#34;E&#34;);
for(string X:T)
{
FIRST[X].insert(X);
}
//非终结符号
int j=0;
while(j<10)
{
for(size_t i=0; i<NT.size(); i++)
{
string A=NT;
//遍历A的每个产生式
for(size_t k=0; k<production[A].size(); k++)
{
int Continue=1;//是否添加E
string right=production[A][k];
//X是每条产生式第一个token
string X;
if(right.find(&#34; &#34;)==string::npos)
X=right;
else
X=right.substr(0,right.find(&#34; &#34;));
//FIRST[A]=FIRST[X]-E
if(!FIRST[X].empty())
{
for(string firstX:FIRST[X])
{
if(firstX==&#34;E&#34;)
continue;
else
{
FIRST[A].insert(firstX);
Continue=0;
}
}
if(Continue)
FIRST[A].insert(&#34;E&#34;);
}
}
}
j++;
}
}
//获取Follow集
void GetFollow(){
//将界符加入开始符号的follow集
FOLLOW[S].insert(&#34;#&#34;);
size_t j=0;
while(j<10)
{
//遍历非终结符号
for(string A:NT)
{
for(string right:production[A])
{
for(string B:NT)
{
//A->Bb
if(right.find(B)!=string::npos)
{
/*找B后的字符b*/
string b;
int flag=0;
//识别到E&#39;
if(right[right.find(B)+B.size()]!=&#39; &#39;&&right[right.find(B)+B.size()]!=&#39;\0&#39;)
{
string s=right.substr(right.find(B));//E&#39;b
string temp=right.substr(right.find(B)+B.size());//&#39; b
//A->E&#39;
if(temp.find(&#34; &#34;)==string::npos)
{
B=s;//B:E->E&#39;
FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());//左部的FOLLOW赋给B
flag=1;
}
//A->E&#39;b
else
{
B=s.substr(0,s.find(&#34; &#34;));
temp=temp.substr(temp.find(&#34; &#34;)+1);//b
//b后无字符
if(temp.find(&#34; &#34;)==string::npos)
b=temp;
//b后有字符
else
b=temp.substr(0,temp.find(&#34; &#34;));
}
}
//A->aEb
else if(right[right.find(B)+B.size()]==&#39; &#39;)
{
string temp=right.substr(right.find(B)+B.size()+1);//B后的子串
//b后无字符
if(temp.find(&#34; &#34;)==string::npos)
b=temp;
//b后有字符
else
b=temp.substr(0,temp.find(&#34; &#34;));
}
//A->aE
else
{
FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());
flag=1;
}
//FOLLOW[B]还没求到
if(flag==0)
{
//FIRST中不包含E
if(FIRST.find(&#34;E&#34;)==FIRST.end())
{
FOLLOW[B].insert(FIRST.begin(),FIRST.end());
}
else
{
for(string follow:FIRST)
{
if(follow==&#34;E&#34;)
continue;
else
FOLLOW[B].insert(follow);
}
FOLLOW[B].insert(FOLLOW[A].begin(),FOLLOW[A].end());
}
}
}
}
}
}
j++;
}
}
//获得分析表
void GetTable(){
for(string A:NT)
{
for(string right:production[A])
{
string first;//right里第一个token
if(right.find(&#34; &#34;)==string::npos)
first=right;
else
first=right.substr(0,right.find(&#34; &#34;));
right=right.insert(0,A+&#34;->&#34;);
pair<string,string>symbol;
//FIRST集里不含E:a来自FIRST[first]
if(FIRST[first].find(&#34;E&#34;)==FIRST[first].end())
{
for(string a:FIRST[first])
{
symbol=make_pair(A,a);
Table[symbol]=right;
}
}
//FIRST集里含E:a来自FOLLOW[a]
else
{
for(string a:FOLLOW[A])
{
symbol=make_pair(A,a);
Table[symbol]=right;
}
}
}
}
}
//分析过程
STATE_CODE Parsing(){
stack<string> Analysis;
auto tokens = Tokens.GetTokens();
auto tokenPos = tokens.begin();
auto tokenEnd = tokens.end();
size_t nodeId = 1;
//文法开始符号入栈
Analysis.push(&#34;#&#34;);
Analysis.push(S);
ast.root = new node; //语法树根节点
auto curNode=ast.root;
curNode->name=S;
curNode->parent=NULL;
curNode->id=0;
vector<vector<TreeNode>> astStack; //分析栈
//进入语法树下一层
astStack.push_back(vector<TreeNode>());
while(Analysis.top()!=&#34;#&#34; && tokenPos!=tokenEnd) {
auto nextToken = *tokenPos;
string top=Analysis.top();
//匹配
if(find(T.begin(),T.end(),top)!=T.end() && nextToken.second==top){
Analysis.pop();
tokenPos++;
size_t tmp=curNode->id;
while(curNode->id!=0){
curNode = curNode->parent;
if(curNode->children.size()>tmp){
curNode = curNode->children[tmp];
break;
}else{
tmp=curNode->id;
}
}
}
//推导
else if(find(NT.begin(),NT.end(),top)!=NT.end()&&find(T.begin(),T.end(),nextToken.second)!=T.end()){
auto symbol = make_pair(top,nextToken.second);
if(!Table[symbol].empty()){
Analysis.pop();
string production=Table[symbol];//产生式
//产生式右部入栈
while(production.find(&#34; &#34;)!=string::npos)
{
string lastToken=production.substr(production.rfind(&#34; &#34;)+1);
Analysis.push(lastToken);
production=production.substr(0,production.rfind(&#34; &#34;));
}
//如果右部是E,就不用入栈了
if(production.substr(production.find(&#34;->&#34;)+2)!=&#34;E&#34;){
Analysis.push(production.substr(production.find(&#34;->&#34;)+2));
}
astStack.push_back(vector<node*>());
//产生式右部
string pdc=Table[symbol].substr(Table[symbol].find(&#34;->&#34;)+2);
nodeId=1;
while(1){
TreeNode newNode = new node;
if(pdc.find(&#34; &#34;)!=string::npos){
string firstToken = pdc.substr(0,pdc.find(&#34; &#34;));
newNode->name = firstToken;
newNode->id = nodeId;
nodeId++;
newNode->parent = curNode;
curNode->children.push_back(newNode);
astStack.back().push_back(newNode);
pdc=pdc.substr(pdc.find(&#34; &#34;)+1);
}
else{
newNode->name=pdc;
newNode->id = nodeId;
nodeId++;
newNode->parent = curNode;
curNode->children.push_back(newNode);
astStack.back().push_back(newNode);
if(curNode->children[0]->name!=&#34;E&#34;)
curNode = curNode->children[0];
else{
size_t tmp=curNode->id;
while(curNode->id!=0){
curNode = curNode->parent;
if(curNode->children.size()>tmp){
curNode = curNode->children[tmp];
break;
}else{
tmp=curNode->id;
}
}
}
break;
}
}
}
else{
cout<<&#34;语法错误,第&#34;<<4<<&#34;行,缺少\&#34;&#34;<<&#39;;&#39;<<&#34;\&#34;&#34;<<endl;
tokenPos=tokens.insert(tokenPos,make_pair(4,&#34;;&#34;));
//Analysis.pop();
}
}
else{
tokenPos++;
}
}
if(Analysis.top()==&#34;#&#34; && tokenPos==tokenEnd)
return OK;
else
return OTHERS;
}
//分析
void Parser(){
GetFirst();
GetFollow();
GetTable();
switch(Parsing()){
case OK:
PrintTree(ast.root,0);
break;
case MISSING_WORDS:
break;
case NO_MACH_PRODUCTION:
break;
case INVALID_WORD:
break;
case OTHERS:
PrintTree(ast.root,0);
break;
}
}
//打印语法树
void PrintTree(TreeNode Node,int deep)
{
for(int i=0; i<=deep-1; i++)
{
cout<<&#34;\t&#34;;
}
cout<<Node->name<<endl;
for(size_t i=0; i<Node->children.size(); i++)
{
PrintTree(Node->children,deep+1);
}
}
//打印NT和T
void PrintV()
{
cout<<&#34;非终结符号集合&#34;<<endl;
for(size_t i=0; i<NT.size(); i++)
{
cout<<NT<<&#34; &#34;;
}
cout<<endl;
cout<<&#34;终结符号集合:&#34;<<endl;
for(size_t i=0; i<T.size(); i++)
{
cout<<T<<&#34; &#34;;
}
cout<<endl;
}
//打印FIRST集
void PrintFIRST()
{
cout<<&#34;FIRST集合为&#34;<<endl;
cout.setf(ios::left);
for(string non_terminal:NT)
{
cout<<setw(20)<<non_terminal;
for(string first:FIRST[non_terminal])
cout<<first<<&#34; &#34;;
cout<<endl;
}
cout<<endl;
}
//打印FOLLOW集
void PrintFOLLOW()
{
cout<<&#34;FOLLOW集合为&#34;<<endl;
cout.setf(ios::left);
for(string non_terminal:NT)
{
cout<<setw(20)<<non_terminal;
for(string follow:FOLLOW[non_terminal])
cout<<follow<<&#34; &#34;;
cout<<endl;
}
cout<<endl;
}
//打印产生式
void PrintP()
{
cout<<&#34;语法的产生式为&#34;<<endl;
for(string left:NT)
{
cout<<left<<&#34;->&#34;;
for(auto it=production[left].begin(); it!=production[left].end(); it++)
{
if(it!=production[left].end()-1)
cout<<*it<<&#34;|&#34;;
else
cout<<*it<<endl;
}
}
cout<<endl;
}
//打印分析表
void PrintTable()
{
cout<<&#34;LL(1)分析表:&#34;<<endl;
vector<string>x=T;//含界符的终结符号集合
x.push_back(&#34;#&#34;);
//输出表格横轴
cout.setf(ios::left);
for (auto it1 = x.begin(); it1 != x.end(); it1++)
{
if (it1==x.begin())
cout<<setw(10)<<&#34; &#34;;
cout<<setw(15)<<*it1;
}
cout<<endl;
for(string A:NT)
{
cout<<setw(10)<<A;
for(string a:x)
{
pair<string,string>symbol;
symbol=make_pair(A,a);
if(!Table[symbol].empty())
cout<<setw(15)<<Table[symbol];
else
cout<<setw(15)<<&#34; &#34;;
}
cout<<endl;
}
}
};
void Analysis()
{
string prog;
read_prog(prog);
/* 骚年们 请开始你们的表演 */
/********* Begin *********/
Token token(prog);
//token.PrintToken();
Grammar grammar(token);
grammar.Parser();
// grammar.PrintV();
// grammar.PrintFIRST();
// grammar.PrintFOLLOW();
// grammar.PrintP();
// grammar.PrintTable();
/********* End *********/
} |
|