编译原理LL(1)文法分析程序的一种思路

it2024-10-19 51

编译原理LL(1)文法分析程序

First(X)集

其中X可以是终结符或非终结符组成的任意字符串或字符

如果X是单个字符。如果X是终结符。那么First(X) = {X};如果X是非终结符。寻找满足X->ABC…N…，的推导式，则First(X)=First(ABC…N…)。如果X是字符串，即X=ABC…N…（这里的A、B、C、N等代表单个字符），那么求出First(A)，如果First(A)中存在’ε’，继续求出First(B)，若First(B)中仍存在’ε‘，继续求出First（C），直到存在一个First(N)，满足First(N)中没有’ε‘。这时候，First(X)等于First(A)、First(B)…First(N)的去除’ε‘的并集。如果一直求到X的最后一个字符，都不存在这样的First(N)，那么First(X)等于First(A)、First(B)…First(N)的并集（注意，这时First(X)中存在’ε‘）。

Follow(X)集

其中X代表一个非终结符。约定S代表文法的开始符号。求Follow(X)时，在所有推导式中，寻找到形如A->…X…的式子。

如果X是该式右部的最后一个元素，即A->…X。则Follow(X)=Follow(A)。否则，即存在形如A->…XB…的式子（B代表一个非终结符或终结符，或二者组成的字符串）。此时求出First(B)，如果’ε‘不在First(B)中，则Follow(X)=First(B)；如果’ε‘在First(B)中，则求出Follow(A)，此时Follow(X)等于Follow(A)加上First(B)中去除’ε‘的所有元素。注意，求取Follow(S)时，将‘#’（输入串终结符）放入Follow(S)中，然后继续求取Follow(S)。（这里S代表文法开始符号）可以看出，在Follow集中是不可能存在’ε‘的，但是可能存在‘#’。

Select(X)集

获得Select集是定义First集和Follow集的目的，而获得Select集，是为了构建LL(1)分析表。在求Select(X)中，X代表一个推导式，形如S->ABC…(其中A、B、C等代表一个字符，S是一个非终结符)。

首先求First(ABC…)，如果’ε‘不在First(ABC…)中，则Select(S->ABC…)=First(ABC…)。如果’ε‘在First(ABC…)中，则求Follow(S)，这时Select(S->ABC…)等于Follow(S)加上First(ABC…)中不是‘ε’的元素。可以看出，在Select集中也不可能存在‘ε’。

代码实现如下

#include <iostream> #include <string> #include <vector> #include <algorithm> #include <list> using namespace std; struct Production {//推导式结构体，同时实现了图的作用。 char left; string right; vector<char> selectSet;//select集 }; vector<Production> production;// vector<char> followStack;//加个栈，防止Follow()出现无限递归，实在没别的办法了。 bool isNonterminal(char para) {//判断字符是否为非终结符 if (para >= 'A' && para <= 'Z') return true; return false; } bool isElementInVector(vector<char> v, char element) {//字符是否已经存在于容器中。 vector<char>::iterator it; it = find(v.begin(), v.end(), element); if (it != v.end()) { return true; } else { return false; } } vector<char> getFirst(char right1) { vector<char> firstSet; for (int i = 0; i < production.size(); i++) { if (!isNonterminal(right1)) {// 对于终结符，直接加入first firstSet.push_back(right1); return firstSet; } else { int countEmpty = 0;//记录空的个数。 if (production[i].left == right1) { for (int j = 0; j < production[i].right.length(); j++) { vector<char> buffer = getFirst(production[i].right[j]); vector<char>::iterator it = buffer.begin(); for (it; it != buffer.end(); it++) { if (*it == '$') // 遍历查看是否含有'$'(能产生空) ++countEmpty; else if (!isElementInVector(firstSet, *it)) firstSet.push_back(*it);//将非$加入FIRST(X) } if (countEmpty == 0) break; } if (countEmpty == production[i].right.length())//所有右部first(Y)都有$(空),将$加入FIRST(X)中 firstSet.push_back('$'); } } } return firstSet; }; vector<char> getFollow(char left) {//求Follow(B) vector<char> followSet; if (is_element_in_vector(followStack, left))//如果在递归栈中已经存在了要求的元素，说明进入了无限递归，这时再求也没有意义，直接返回空集。 return followSet; else followStack.push_back(left); if (left == production[0].left)//对文法开始符号S,置#于FOLLOW(S)中 followSet.push_back('#'); for (int i = 0; i < production.size(); i++) { for (int j = 0; j < production[i].right.length(); j++) { if (production[i].right[j] == left) { if (j + 1 == production[i].right.length() && production[i].left != left) {//A->aB,将Follow（A）加入Follow（B） vector<char> buffer1 = getFollow(production[i].left); vector<char>::iterator it = buffer1.begin(); for (it; it != buffer1.end(); it++) { if (!is_element_in_vector(followSet, *it)) followSet.push_back(*it); } } else {//A->aBC int countEmpty = 0; for (int k = j+1; k < production[i].right.length(); k++) {//将除去空集e的First（C）加入Follow（B） vector<char> buffer = getFirst(production[i].right[k]); vector<char>::iterator iter = buffer.begin(); for (iter; iter != buffer.end(); iter++) { if (!is_element_in_vector(followSet, *iter) && *iter != '$') followSet.push_back(*iter); } if (is_element_in_vector(buffer, '$')) ++countEmpty; else break; } if (countEmpty == production[i].right.length() - j - 1) {//若C=>*e，将Follow（A）加入Follow（B）中 vector<char> buffer1 = getFollow(production[i].left); vector<char>::iterator it = buffer1.begin(); for (it; it != buffer1.end(); it++) { if (!is_element_in_vector(followSet, *it)) followSet.push_back(*it); } } } break;//防止 S->aAbAc这种出现。找右部第一个。 } } } followStack.pop_back(); return followSet; }; void getSelect(Production& prod) { int countEmpty = 0; for (int i = 0; i < prod.right.length(); i++) { vector<char> buffer = getFirst(prod.right[i]); vector<char>::iterator iter = buffer.begin(); for (iter; iter != buffer.end(); iter++) { if (!isElementInVector(prod.selectSet, *iter) && *iter != '$') prod.selectSet.push_back(*iter); } if (isElementInVector(buffer, '$')) ++countEmpty; else break; } if (countEmpty == prod.right.length()) prod.selectSet.push_back('$'); if (isElementInVector(prod.selectSet, '$') ) { vector<char> buffer = getFollow(prod.left); prod.selectSet.insert(prod.selectSet.end(), buffer.begin(), buffer.end()); vector<char>::iterator it = find(prod.selectSet.begin(), prod.selectSet.end(), '$'); prod.selectSet.erase(it); } }; void main() { string st;//输入串 int is = 0;//串首指针 list<char> stack;//比较的栈 cout << "****************************************************************************" << endl; cout << "约定非终结符为大写字母 $代表空 #代表输入串结束。存在|符号的要分成两个式子写。" << endl; cout << "****************************************************************************" << endl; cout << "请分别输入文法的左部和右部。输入#结束。" << endl; cout << "****************************************************************************" << endl; char ch; while(cin>> ch){ if (ch == '#') break; string st; cout << "->" ; cin >> st; cout << "输入下一条文法：" << endl; production.push_back({ ch,st }); } cout << "****************************************************************************" << endl; cout << "LL1分析表：" << endl; cout << "****************************************************************************" << endl; for (int i = 0; i < production.size(); i++) { getSelect(production[i]); } cout << "左部----------" << "右部-----------" << "selcet集----------" << endl; for (int i = 0; i < production.size(); i++) { cout << production[i].left << " " <<production[i].right << " "; vector<char>::iterator iter = production[i].selectSet.begin(); for (iter; iter != production[i].selectSet.end(); iter++) cout << *iter<<" "; cout << endl; } cout << "****************************************************************************" << endl; cout << "输入需要检测的串，以#结尾。" << endl; cin >> st; cout << "****************************************************************************" << endl; cout << "分析过程：" << endl; cout << "****************************************************************************" << endl; cout << "分析栈----------" << "当前串-----------" << "操作----------" << endl; stack.push_back('#'); stack.push_back(production[0].left); do { if ((stack.back() == st[is]) && stack.back()!='#') { list<char>::iterator it; //输出栈 for (it = stack.begin(); it != stack.end(); it++) { cout << *it; } cout << " "; for (int i = is; i < st.length(); i++) cout << st[i]; cout << " " << st[is] << "匹配" << endl; ++is; stack.pop_back(); } else if (stack.back() == '#' && st[is] == '#') { cout << '#' << " " << '#' << " " << "接受"; break; } else { int flag = 0; for (int i = 0; i < production.size(); i++) { if (stack.back() == production[i].left) { for (int j = 0; j < production[i].selectSet.size(); j++) { if (production[i].selectSet[j] == st[is]) { list<char>::iterator it; //输出栈 for (it = stack.begin(); it != stack.end(); it++) { cout << *it; } cout << " "; for (int i = is; i < st.length(); i++)//输出当前串 cout << st[i]; cout << " "; cout << stack.back() << "->" << production[i].right << endl; stack.pop_back(); if (production[i].right != "$") { for (int k = production[i].right.length() - 1; k >= 0; k--)//右部倒序入栈。 stack.push_back(production[i].right[k]); } flag = 1; break; } } } } if (flag == 0) { cout << "文法错误，" << stack.back()<<"与"<<st[is]<<"不匹配"; break; } } } while (1); }

测试集如下

//其中’#'代表'ε' //测试集一 E->TA A->TA A->$ T->FB B->FB B->$ F->(E) F->i //输入串：i+i*i# //测试集二 S->aA S->d A->bAS A->$ //输入串：abbad# //测试集三 S->AaS S->BbS S->d A->a B->$ B->c //输入串：aacbd#

测试结果

任良武别学了，学不会的。

最新回复(0)