<ruby id="bdb3f"></ruby>

    <p id="bdb3f"><cite id="bdb3f"></cite></p>

      <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
        <p id="bdb3f"><cite id="bdb3f"></cite></p>

          <pre id="bdb3f"></pre>
          <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

          <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
          <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

          <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                <ruby id="bdb3f"></ruby>

                ThinkChat2.0新版上線,更智能更精彩,支持會話、畫圖、視頻、閱讀、搜索等,送10W Token,即刻開啟你的AI之旅 廣告
                LLVM平臺,短短幾年間,改變了眾多編程語言的走向,也催生了一大批具有特色的編程語言的出現,不愧為編譯器架構的王者,也榮獲2012年ACM軟件系統獎 —— 題記 版權聲明:本文為 西風逍遙游 原創文章,轉載請注明出處 西風世界 [http://blog.csdn.net/xfxyy_sxfancy](http://blog.csdn.net/xfxyy_sxfancy) # 簡單的詞法和語法分析 Lex和Yacc真是太好用了,非常方便我們構建一門語言的分析程序。 如果你對Lex和Yacc不了解的話,建議先看下我之前寫的兩篇文章,分別介紹了Lex和Yacc的用法。 Lex識別C風格字符串和注釋 [http://blog.csdn.net/xfxyy_sxfancy/article/details/45024573](http://blog.csdn.net/xfxyy_sxfancy/article/details/45024573) 創造新語言(2)——用Lex&Yacc構建簡單的分析程序 [http://blog.csdn.net/xfxyy_sxfancy/article/details/45046465](http://blog.csdn.net/xfxyy_sxfancy/article/details/45046465) ### FLex創建一門語言的詞法分析程序 我們創建的是一門編程語言,那么詞法分析程序就不能像做實驗一樣那么草率,必須考慮周全,一般一門語言的詞法分析程序大概需要囊括如下的幾個方面: 識別關鍵字、識別標識符、識別基本常量(數字、浮點數、字符串、字符)、識別注釋、識別運算符 這些都是非常重要的,而且是一門語言語法中必不可少的部分。 于是RedApple的詞法分析部分,我就設計成了這樣: ~~~ %{ #include <string> #include "Model/nodes.h" #include <list> using namespace std; #include "redapple_parser.hpp" #include "StringEscape.h" #define SAVE_TOKEN yylval.str = maketoken(yytext, yyleng) #define SAVE_STRING yylval.str = makestring(yytext, yyleng, 2) #define SAVE_STRING_NC yylval.str = makestring(yytext, yyleng, 3) extern "C" int yywrap() { return 1; } char* maketoken(const char* data, int len); char* makestring(const char* data, int len, int s); %} %option yylineno %% "/*"([^\*]|(\*)*[^\*/])*(\*)*"*/" ; /* 就是這種注釋 */ #[^\n]*\n ; /* 井號注釋 */ "//"[^\n]*\n ; /* 雙線注釋 */ [ \t\v\n\f] ; /* 過濾空白字符 */ "==" return CEQ; "<=" return CLE; ">=" return CGE; "!=" return CNE; "<" return '<'; "=" return '='; ">" return '>'; "(" return '('; ")" return ')'; "[" return '['; "]" return ']'; "{" return '{'; "}" return '}'; "." return '.'; "," return ','; ":" return ':'; ";" return ';'; "+" return '+'; "-" return '-'; "*" return '*'; "/" return '/'; "%" return '%'; "^" return '^'; "&" return '&'; "|" return '|'; "~" return '~'; /* 宏運算符 */ "@" return '@'; ",@" return MBK; /* 下面聲明要用到的關鍵字 */ /* 控制流 */ "if" return IF; "else" return ELSE; "while" return WHILE; "do" return DO; "goto" return GOTO; "for" return FOR; "foreach" return FOREACH; /* 退出控制 */ "break"|"continue"|"exit" SAVE_TOKEN; return KWS_EXIT; "return" return RETURN; /* 特殊運算符 */ "new" return NEW; "this" return THIS; /* 特殊定義 */ "delegate" return DELEGATE; "def" return DEF; "define" return DEFINE; "import" return IMPORT; "using" return USING; "namespace" return NAMESPACE; "try"|"catch"|"finally"|"throw" SAVE_TOKEN; return KWS_ERROR; /* 異常控制 */ "null"|"true"|"false" SAVE_TOKEN; return KWS_TSZ; /* 特殊值 */ "struct"|"enum"|"union"|"module"|"interface"|"class" SAVE_TOKEN; return KWS_STRUCT; /* 結構聲明 */ "public"|"private"|"protected" SAVE_TOKEN; return KWS_FWKZ; /* 訪問控制 */ "const"|"static"|"extern"|"virtual"|"abstract"|"in"|"out" SAVE_TOKEN; return KWS_FUNC_XS; /* 函數修飾符 */ "void"|"double"|"int"|"float"|"char"|"bool"|"var"|"auto" SAVE_TOKEN; return KWS_TYPE; /* 基本類型 */ [a-zA-Z_][a-zA-Z0-9_]* SAVE_TOKEN; return ID; /* 標識符 */ [0-9]*\.[0-9]* SAVE_TOKEN; return DOUBLE; [0-9]+ SAVE_TOKEN; return INTEGER; \"(\\.|[^\\"])*\" SAVE_STRING; return STRING; /* 字符串 */ @\"(\\.|[^\\"])*\" SAVE_STRING_NC; return STRING; /* 無轉義字符串 */ \'(\\.|.)\' SAVE_STRING; return CHAR; /* 字符 */ . printf("Unknown Token!\n"); yyterminate(); %% char* maketoken(const char* data, int len) { char* str = new char[len+1]; strncpy(str, data, len); str[len] = 0; return str; } char* makestring(const char* data, int len, int s) { char* str = new char[len-s+1]; strncpy(str, data+s-1, len-s); str[len-s] = 0; if (s == 3) return str; printf("source: %s\n",str); char* ans = CharEscape(str); printf("escape: %s\n",ans); delete[] str; return ans; } ~~~ 看起來非常的長,但主要多的就是枚舉了大量的關鍵字和運算符,當然,這個你在開發一門語言的前期,不用面面俱到,可以選自己用到的先寫,不足的再日后補充。 要注意,這里最難的應該就是: ~~~ "/*"([^\*]|(\*)*[^\*/])*(\*)*"*/" ; /* 就是這種注釋 */ ~~~ 乍看起來,非常恐怖的正則式,但其實就是在枚舉多種可能情況,來保障注釋范圍的正確性。 ~~~ "/*" ( [^\*] | (\*)* [^\*/] )* (\*)* "*/" ; /* 就是這種注釋 */ ~~~ ### 用Bison創建通用的語法分析程序 這里我編寫的是類C語言的語法,要注意的是,很多情況會造成規約-規約沖突和移入-規約沖突。這里我簡要介紹一個bison的工作原理。 這種算法在編譯原理中,被稱為LALR(1)分析法,是自底向上規約的算法之一,而且又會向前看一個token,Bison中的每一行,被稱為一個產生式(或BNF范式) 例如下面這行: ~~~ def_module_statement : KWS_STRUCT ID '{' def_statements '}' ~~~ 左邊的是要規約的節點, 冒號右邊是描述這個語法節點是用哪些節點產生的。 這是一個結構體定義的語法描述,KWS_STRUCT是終結符,來自Lex里的元素,看了上面的Lex描述,你應該能找到它的定義: ~~~ "struct"|"enum"|"union"|"module"|"interface"|"class" SAVE_TOKEN; return KWS_STRUCT; /* 結構聲明 */ ~~~ 其實就是可能的一些關鍵字。而def_statements是另外的語法節點,由其他定義得來。 規約-規約沖突,是說,在當前產生式結束后,后面跟的元素還確定的情況下,能夠規約到兩個不同的語法節點: ~~~ def_module_statement : KWS_STRUCT ID '{' def_statements '}' ; def_class_statement : KWS_STRUCT ID '{' def_statements '}' ; statement : def_module_statement ';' | def_class_statement ';' ; ~~~ 以上文法便會產生規約-規約沖突,這是嚴重的定義錯誤,必須加以避免。 注意,我為了體現這個語法的錯誤,特意加上了上下文環境,不是說一樣的語法定義會產生規約規約沖突,而是說后面可能跟的終結符都一樣時,(在這里是’;’)才會產生規約規約沖突,所以避免這種問題也簡單,就是把相似的語法節點合并在一起就可以了。 說道移入-規約沖突,就要談起if-else的搖擺問題: ~~~ if_state : IF '(' expr ')' statement | IF '(' expr ')' statement ELSE statement ; statement : if_state | ... ; ~~~ 正如這個定義一樣,在if的前半部識別完成后,下一個元素是ELSE終結符,此時可以規約,可以移入 說規約合法的理由是,if_state也是statement,而if第二條statement后面就是ELSE。 根據算法,這里規約是合理的,而移入同樣是合理的。 為了避免這種沖突,一般Bison會優先選擇移入,這樣ELSE會和最近的IF匹配。 所以說,移入-規約沖突在你清楚的知道是哪的問題的時候,可以不加處理。但未期望的移入-規約沖突有可能讓你的分析器不正確工作,這點還需要注意。 下面是我的Bison配置文件: ~~~ %{ #include "Model/nodes.h" #include <list> using namespace std; #define YYERROR_VERBOSE 1 Node *programBlock; /* the top level root node of our final AST */ extern int yylex(); extern int yylineno; extern char* yytext; extern int yyleng; void yyerror(const char *s); %} /* Represents the many different ways we can access our data */ %union { Node *nodes; char *str; int token; } /* Define our terminal symbols (tokens). This should match our tokens.l lex file. We also define the node type they represent. */ %token <str> ID INTEGER DOUBLE %token <token> CEQ CNE CGE CLE MBK %token <token> '<' '>' '=' '+' '-' '*' '/' '%' '^' '&' '|' '~' '@' %token <str> STRING CHAR %token <token> IF ELSE WHILE DO GOTO FOR FOREACH %token <token> DELEGATE DEF DEFINE IMPORT USING NAMESPACE %token <token> RETURN NEW THIS %token <str> KWS_EXIT KWS_ERROR KWS_TSZ KWS_STRUCT KWS_FWKZ KWS_FUNC_XS KWS_TYPE /* Define the type of node our nonterminal symbols represent. The types refer to the %union declaration above. Ex: when we call an ident (defined by union type ident) we are really calling an (NIdentifier*). It makes the compiler happy. */ %type <nodes> program %type <nodes> def_module_statement %type <nodes> def_module_statements %type <nodes> def_statement %type <nodes> def_statements %type <nodes> for_state %type <nodes> if_state %type <nodes> while_state %type <nodes> statement %type <nodes> statements %type <nodes> block %type <nodes> var_def %type <nodes> func_def %type <nodes> func_def_args %type <nodes> func_def_xs %type <nodes> numeric %type <nodes> expr %type <nodes> call_arg %type <nodes> call_args %type <nodes> return_state //%type <token> operator 這個設計容易引起規約沖突,舍棄 /* Operator precedence for mathematical operators */ %left '~' %left '&' '|' %left CEQ CNE CLE CGE '<' '>' '=' %left '+' '-' %left '*' '/' '%' '^' %left '.' %left MBK '@' %start program %% program : def_statements { programBlock = Node::getList($1); } ; def_module_statement : KWS_STRUCT ID '{' def_statements '}' { $$ = Node::make_list(3, StringNode::Create($1), StringNode::Create($2), $4); } | KWS_STRUCT ID ';' { $$ = Node::make_list(3, StringNode::Create($1), StringNode::Create($2), Node::Create()); } ; def_module_statements : def_module_statement { $$ = Node::getList($1); } | def_module_statements def_module_statement { $$ = $1; $$->addBrother(Node::getList($2)); } ; func_def_xs : KWS_FUNC_XS { $$ = StringNode::Create($1); } | func_def_xs KWS_FUNC_XS {$$ = $1; $$->addBrother(StringNode::Create($2)); } ; def_statement : var_def ';' { $$ = $1; } | func_def | def_module_statement | func_def_xs func_def { $$ = $2; $2->addBrother(Node::getList($1)); } ; def_statements : def_statement { $$ = Node::getList($1); } | def_statements def_statement { $$ = $1; $$->addBrother(Node::getList($2)); } ; statements : statement { $$ = Node::getList($1); } | statements statement { $$ = $1; $$->addBrother(Node::getList($2)); } ; statement : def_statement | expr ';' { $$ = $1; } | block | if_state | while_state | for_state | return_state ; if_state : IF '(' expr ')' statement { $$ = Node::make_list(3, StringNode::Create("if"), $3, $5); } | IF '(' expr ')' statement ELSE statement { $$ = Node::make_list(4, StringNode::Create("if"), $3, $5, $7); } ; while_state : WHILE '(' expr ')' statement { $$ = Node::make_list(3, StringNode::Create("while"), $3, $5); } ; for_state : FOR '(' expr ';' expr ';' expr ')' statement { $$ = Node::make_list(5, StringNode::Create("for"), $3, $5, $7, $9); } | FOR '(' var_def ';' expr ';' expr ')' statement { $$ = Node::make_list(5, StringNode::Create("for"), Node::Create($3), $5, $7, $9); } ; return_state : RETURN ';' { $$ = StringNode::Create("return"); } | RETURN expr ';' { $$ = StringNode::Create("return"); $$->addBrother($2); } block : '{' statements '}' { $$ = Node::Create($2); } | '{' '}' { $$ = Node::Create(); } ; var_def : KWS_TYPE ID { $$ = Node::make_list(3, StringNode::Create("set"), StringNode::Create($1), StringNode::Create($2)); } | ID ID { $$ = Node::make_list(3, StringNode::Create("set"), StringNode::Create($1), StringNode::Create($2)); } | KWS_TYPE ID '=' expr { $$ = Node::make_list(4, StringNode::Create("set"), StringNode::Create($1), StringNode::Create($2), $4); } | ID ID '=' expr { $$ = Node::make_list(4, StringNode::Create("set"), StringNode::Create($1), StringNode::Create($2), $4); } ; func_def : ID ID '(' func_def_args ')' block { $$ = Node::make_list(5, StringNode::Create("function"), StringNode::Create($1), StringNode::Create($2), $4, $6); } | KWS_TYPE ID '(' func_def_args ')' block { $$ = Node::make_list(5, StringNode::Create("function"), StringNode::Create($1), StringNode::Create($2), $4, $6); } | ID ID '(' func_def_args ')' ';' { $$ = Node::make_list(5, StringNode::Create("function"), StringNode::Create($1), StringNode::Create($2), $4); } | KWS_TYPE ID '(' func_def_args ')' ';' { $$ = Node::make_list(5, StringNode::Create("function"), StringNode::Create($1), StringNode::Create($2), $4); } ; func_def_args : var_def { $$ = Node::Create(Node::Create($1)); } | func_def_args ',' var_def { $$ = $1; $$->addChildren(Node::Create($3)); } | %empty { $$ = Node::Create(); } ; numeric : INTEGER { $$ = IntNode::Create($1); } | DOUBLE { $$ = FloatNode::Create($1); } ; expr : expr '=' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("="), $1, $3); } | ID '(' call_args ')' { $$ = Node::make_list(2, StringNode::Create("call"), StringNode::Create($1)); $$->addBrother($3); } | ID { $$ = IDNode::Create($1); } | numeric { $$ = $1; } | STRING { $$ = StringNode::Create($1); } | KWS_TSZ | NEW ID '(' call_args ')' { $$ = Node::make_list(3, StringNode::Create("new"), StringNode::Create($2), $4); } | expr CEQ expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("=="), $1, $3); } | expr CNE expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("!="), $1, $3); } | expr CLE expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("<="), $1, $3); } | expr CGE expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create(">="), $1, $3); } | expr '<' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("<"), $1, $3); } | expr '>' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create(">"), $1, $3); } | expr '+' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("+"), $1, $3); } | expr '-' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("-"), $1, $3); } | expr '*' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("*"), $1, $3); } | expr '/' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("/"), $1, $3); } | expr '%' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("%"), $1, $3); } | expr '^' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("^"), $1, $3); } | expr '&' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("&"), $1, $3); } | expr '|' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("|"), $1, $3); } | expr '.' expr { $$ = Node::make_list(4, StringNode::Create("opt2"), StringNode::Create("."), $1, $3); } | '~' expr { $$ = Node::make_list(4, StringNode::Create("opt1"), StringNode::Create("~"), $2); } | '(' expr ')' /* ( expr ) */ { $$ = $2; } ; call_arg : expr { $$ = $1; } | ID ':' expr { $$ = Node::make_list(3, StringNode::Create(":"), $1, $3); } ; call_args : %empty { $$ = Node::Create(); } | call_arg { $$ = Node::getList($1); } | call_args ',' call_arg { $$ = $1; $$->addBrother(Node::getList($3)); } ; %% void yyerror(const char* s){ fprintf(stderr, "%s \n", s); fprintf(stderr, "line %d: ", yylineno); fprintf(stderr, "text %s \n", yytext); exit(1); } ~~~
                  <ruby id="bdb3f"></ruby>

                  <p id="bdb3f"><cite id="bdb3f"></cite></p>

                    <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
                      <p id="bdb3f"><cite id="bdb3f"></cite></p>

                        <pre id="bdb3f"></pre>
                        <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

                        <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
                        <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

                        <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                              <ruby id="bdb3f"></ruby>

                              哎呀哎呀视频在线观看