編譯原理(一) Chomsky文法的判斷方法及C++代碼實現 · 編譯原理算法實現

### 一、明確定義 - 0型文法：對任一產生式α→β，都有α∈(VN∪VT)+， β∈(VN∪VT)* - 1型文法：對任一產生式α→β，都有|β|≥|α|，僅僅 α→ε除外 - 2型文法：對任一產生式α→β，都有α∈VN ， β∈(VN∪VT)* - 3型文法：任一產生式α→β的形式都為A→aB或A→a，其中A∈VN,B∈VN,a∈VT。上述叫做右線性文法，另有左線性文法，二者等價。 ### 二、基本思路 - 0型文法 - 首先字符串α的是(Vn?Vt)+，是全符號集的一個正閉包，那么包含符號集中所有符號的一個任一組合，但不包含ε元素。 - 字符串β的是(Vn?Vt)?，是全符號集的一個閉包，那么它比α會多一個ε元素。 - 那么我們想要判斷一個文法是否為0型文法，只需要判斷***左側非空即可*** - 任何0型語言都是遞歸可枚舉的，故0型語言又稱***遞歸可枚舉集*** - 1型文法 - 首先1型文法必須是0型文法 - 1型文法除了α→ε這一個特例外，其他情況都滿足***β的長度大于α的長度*** - 1型文法也叫作***上下文相關文法*** - 2型文法 - 首先2型文法必須是1型文法 - 2型文法左邊***必須***是一個***非終結字符*** - 2型文法也叫做***上下文無關文法*** - 3型文法 - 首先3型文法必須是2型文法 - 3型文法必須是***線性文法*** - 也就是在A,B為***非終結符***，a是***終結符***的情況下，產生式只滿足如下兩種形式(如下為右線性的例子)： - A→aB - A→a ### 三、代碼實現： - 提供兩個實現方案： - 根據產生式，自己判斷Vn,Vt，然后判斷文法的類型，支持產生式的縮寫版本，是***最早實現***的版本，可能代碼的安排上有些混亂，冗余代碼也比較多 ~~~ #include<iostream> #include<string> #include <cstdlib> #include <cstdio> #include <map> #include <vector> #include <cctype> using namespace std; typedef struct CSS { string left; string right;//定義產生式的右部 }CSS; bool Zero (CSS *p, int n) { int i,j; for(i=0;i<n;i++)//循環n次，即遍歷所有產生式 { for(j=0;j<p[i].left.length();j++)//遍歷產生式左部每一個字符 { if(p[i].left[j]>='A'&&p[i].left[j]<='Z')//判斷字符是否是非終結符 break; } if(j==p[i].left.length()) { cout<<"該文法不是0型文法"<<endl; return 0; break; } } if(i==n) return 1;//如果每個產生時都能找到非終結符 } bool First(CSS *p , int n )//判斷1型文法 { int i; if(Zero(p,n)) //先判斷是否是0型文法 { for(i=0;i<n;i++) { if((p[i].left.length()>p[i].right.length())&&p[i].right.length()!=NULL)//判斷產生式左部長度是否大于右部 break; } if (i == n ) return 1; else { cout<<"該文法是0型文法"<<endl; return 0; } } else return 0; } bool Second( CSS*p,int n)//判斷2型文法 { int i; if(First(p,n))//同上，先判斷低級文法是否成立 { for(i=0;i<n;i++)//同上，遍歷所有文法產生式 { if((p[i].left.length()!=1)||!(p[i].left[0]>='A'&&p[i].left[0]<='Z')) break; } if(i==n) return 1; else { cout<<"該文法是1型文法"<<endl; return 0; } } else return 0; } void Third(CSS *p,int n)//判斷3型文法 { int i; if(Second(p,n))//同上，先判斷是否是2型文法 { for(i=0;i<n;i++)//同上，遍歷文法所有的產生式 { if((p[i].right.length()==0)||(p[i].right.length()>=3)||(p[i].right[0]>='A'&&p[i].right[0]<='Z'))//判斷產生式右部字符個數是否在12之間，判斷右部第一個字符是否是非終結符 break; } if(i==n) { for(i=0;i<n;i++) { if(p[i].right.length()==2) { if(!(p[i].right[1]>='A'&&p[i].right[1]<='Z')) break; } } if(i==n) { cout<<"該文法屬于3型文法"<<endl; } else cout<<"該文法屬于2型文法"<<endl; } else cout<<"該文法屬于2型文法"<<endl; } else cout<<"結束"<<endl; } int main ( ) { CSS *p = new CSS[100]; map<char,bool> dic; map<char,bool> dic2; vector<char> VN; vector<char> VT; string input1,input2,input3; cout <<"請輸入文法："<<endl; cin >> input1; cout << "請輸入VN: "<<endl; cin >> input2; for ( int i = 0 ; i < input2.length() ; i++ ) if ( isalnum ( input2[i] ) ) { VN.push_back ( input2[i] ); dic[input2[i]]=true; } cout <<"請輸入產生式規則的個數:"<<endl; int n; cin >> n; cout <<"請輸入產生式規則："<<endl; int cnt = 0; for ( int i = 0 ; i < n ; i++ ) { input3.erase (); cin >> input3; bool flag = false; for ( int j = 0 ; j < input3.length() ; j++ ) if ( input3[j] =='|' ) flag = true; if ( flag ) { string temp; int j; for ( j = 0 ; j < input3.length(); j++ ) { if ( input3[j] ==':' ) { temp = input3.substr(0,j); j = j+3; break; } } for ( int k =j ; k < input3.length() ; k++ ) { if ( isalnum ( input3[k] ) ) { p[cnt].left = temp; int tt = k; for ( ;tt < input3.length(); tt++ ) if ( input3[tt]=='|' ) break; if ( input3[tt] == '|' ) tt--; p[cnt].right = input3.substr( k , tt-k+1 ); if ( dic[input3[k]] == false ) { VT.push_back ( input3[k] ); dic[input3[k]] = true; } cnt++; k = tt; } } continue; } for ( int j = 0 ; j < input3.length() ; j++ ) { if ( input3[j]== ':' ) { p[cnt].left=input3.substr(0,j); p[cnt].right=input3.substr(j+3,input3.length()); cnt++; break; } } for ( int j = 0 ; j < input3.length() ; j++ ) { if ( isalnum( input3[j] ) ) { if ( dic[input3[j]] ) continue; VT.push_back ( input3[j] ); dic[input3[j]] = true; } } } cout << input1 << " = ( {"; for ( int i = 0 ; i < VN.size()-1 ; i++ ) cout << VN[i] << ","; cout << VN[VN.size()-1] <<"},{"; for ( int i = 0 ; i < VT.size()-1 ; i++ ) cout << VT[i] << ","; cout << VT[VT.size()-1] << "},"; cout << "P," << input1[2] << " )"<<endl; cout << "P : " << endl; vector<string> output; vector<string> head[500]; string pre[500]; for ( int i = 0 ; i < cnt ; i++ ) { int x = p[i].left[0]; head[x].push_back ( p[i].right ); pre[x] = p[i].left; } for ( int i = 0 ; i < 500 ; i++ ) { if ( head[i].size() == 0 ) continue; string temp = pre[i]+" ::= "; for ( int j = 0 ; j < head[i].size() ; j++ ) { temp += head[i][j]; if ( j != head[i].size() - 1 ) temp += " | "; } output.push_back ( temp ); } for ( int i = 0 ; i < output.size() ; i ++ ) cout << output[i] << endl; Third ( p , cnt ); } ~~~ - 第二個版本則是代碼和注釋風格比較清楚的實現版本，是周末整理之后的一個縮減的版本，不支持縮寫，需要提前設定字符集，但是給出了一個更良好的實現方案，適合理解這4種文法類型。 ~~~ #include <iostream> #include <cstdio> #include <algorithm> #include <string> #include <vector> #include <cstring> #include <cctype> using namespace std; const int VSIZE= 300; class Principle { public: string left; string right; Principle ( const char* , const char* ); }; //只考慮不包含varepsilon的情況，且所有元素均只包含一個字母 vector<char> VN;//非終結符集 vector<char> VT;//終結符集 vector<Principle> principle;//產生式的集合 int type[VSIZE];//每個字符的類型 void init();//清理工作 int get_type(char);//1代表是非終結符，2代表是終結符 bool set_type(char,int);//設置一個字符的類型 int get_result ( );//獲得輸入的文法的類型 int main ( ) { char buf[1000]; char ** elements; while ( true ) { puts("輸入VN:"); gets( buf ); for ( int i = 0 ; i < strlen(buf); i++ ) { char ch = buf[i]; if ( !isupper(ch) ) continue; if ( get_type(ch) ) continue; VN.push_back ( ch ); set_type(ch,1); } puts("輸入VT:"); gets( buf ); for ( int i = 0 ; i < strlen(buf); i++ ) { char ch = buf[i]; if ( !islower(ch) ) continue; if ( get_type(ch) ) continue; VT.push_back ( ch ); set_type(ch,2); } puts("輸入產生式：（格式為[A::=...a...]）, 輸入\"exit\"作為結束"); while ( true ) { gets ( buf ); if ( !strcmp(buf , "exit" ) ) break; int i; for ( i = 0 ; i < strlen(buf) ; i++ ) if ( buf[i] == ':' ) { buf[i] = 0; i = i+3; break; } principle.push_back ( Principle( buf , buf+i ) ); printf ( "%s|%s|\n" , buf , buf+i ); } int flag = get_result(); switch ( flag ) { case -1: puts("產生式中出現未知字符"); break; case 0: puts("該文法為0型文法"); break; case 1: puts("該文法為1型文法"); break; case 2: puts("該文法為2型文法"); break; case 3: puts("該文法為左線性型文法"); break; case 4: puts("該文法為右線性型文法"); break; } } return 0; } Principle::Principle ( const char*l , const char* r ) { left = l; right = r; } //判斷字符串是否包含未知字符 bool hasError ( const string& s ) { for ( int i = 0 ; i < s.length() ; i++ ) if ( !get_type(s[i]) ) return true; return false; } //判斷是否為0型文法 bool isZero ( ) { for ( int i = 0 ; i < principle.size() ; i++ ) if ( hasError(principle[i].left) ) return false; else if ( hasError(principle[i].right)) return false; return true; } //判斷一個0型文法是否為1型文法 bool isOne ( ) { for ( int i = 0 ; i < principle.size(); i++ ) if ( principle[i].left.length() > principle[i].right.length() ) return false; return true; } //判斷一個1型文法是否為2型文法 bool isTwo ( ) { for ( int i = 0 ; i < principle.size() ; i++ ) { string left = principle[i].left; if ( left.size() != 1 ) return false; if ( get_type(left[0]) != 1 ) return false; } return true; } //判斷一個2型文法是否為左線性文法 bool isLeftThree () { for ( int i = 0 ; i < principle.size() ; i++ ) { string right = principle[i].right; for ( int j = 1; j < right.length() ; j++ ) if ( get_type(right[j]) != 2 ) return false; } return true; } //判斷一個2型文法是否為右線性文法 bool isRightThree () { for ( int i = 0 ; i < principle.size() ; i++ ) { string right = principle[i].right; for ( int j = 0 ; j < right.length()-1; j++ ) if ( get_type(right[j]) != 2 ) return false; } return true; } int get_result ( ) { if ( !isZero() ) return -1; if ( !isOne() ) return 0; if ( !isTwo() ) return 1; if ( isLeftThree() ) return 3; if ( isRightThree() ) return 4; return 2; } void init ( ) { VN.clear(); VT.clear(); principle.clear(); memset ( type , 0 , sizeof ( type ) ); } int get_type ( char ch ) { return type[ch]; } bool set_type ( char ch , int x ) { type[ch] = x; return true; } ~~~