编译原理 词法分析程序 C++.doc
编译原理 词法分析程序 C+#include <iostream>#include <fstream>#include <string>using namespace std;/*全局变量*/ifstream infile; /读取目标字符串ofstream outfile; /向文件中输出结果char buf164 ,buf264; /双缓冲char *p2; /指向双缓冲char数组的指针char ci; /用来存储从文件中读取的单个charstring token; /多个ci组成的一个字符串string keyWord="asm","do","if","return","typedef","auto","double","inline","short","typeid","bool","dynamic_cast","int","signed","typename","break","else","long","sizeof","union","case","enum","mutable","static","unsigned","catch","explicit","namespace","static_cast","using","char","export","new","struct","virtual","class","extern","operator","switch","void","const","false","private","template","volatile","const_cast","float","protected","this","wchar_t","continue","for","public","throw","while","default","friend","register","true","delete","goto","reinterpret_cast","try","NULL"/保留字(摘自百度百科)最后一个"NULL是我自己加的"const int KEYWORDLENGTH = 64; /keyWord的长度/*全局变量*/*函数声明*/void start(); /main函数开始时对程序的预处理void get_ci(); /获取字符,使用双缓冲bool isLetter(int); /判断是否是 字母bool isDigit(int); /判断是否是 数字bool isUnderLine(int); /判断是否是 下划线bool isKeyWord(string); /判断是否是 关键字char getType(char); /获取ci的类型 代表void printFile(string ,string);/输出至文件void retractPoint();/p2指针前移撤销一位/*函数声明*/void main()start();/预处理,初始化while (true)/死循环直到主动跳出token=""/每次循环时,清空get_ci();switch(getType(ci) case 'a':/a是字母的代表,当ci是字母是,进入这个casetoken=token+ci;get_ci();while (isLetter(ci)|isDigit(ci)|isUnderLine(ci)token=token+ci;get_ci();/while循环跳出if (isKeyWord(token)printFile(token,"关键字");elseprintFile(token,"标识符");/此时指针指向了下一个retractPoint();/所以指针要回退;break;case '_':token=token+ci;get_ci();while (isLetter(ci)|isDigit(ci)|isUnderLine(ci)token=token+ci;get_ci();/while循环跳出if (isKeyWord(token)printFile(token,"关键字");elseprintFile(token,"标识符");/此时指针指向了下一个retractPoint();/所以指针要回退;break;case '0':while (isDigit(ci)|ci='.')token =token +ci;get_ci();printFile(token,"数值");retractPoint();break;case '<':get_ci();if (ci='=')printFile("<=","运算符");else if (ci='<')printFile("<<","运算符");elseprintFile("<","运算符");retractPoint();break;case'/':get_ci();if (ci='/')do get_ci(); while (ci!='n');elseprintFile("/","运算符");retractPoint();break;case '+':get_ci();if (ci='=')printFile("+=","运算符");else if (ci='+')printFile("+","运算符");elseprintFile("+","运算符");retractPoint();break;case '-':token=token+ci;get_ci();if (ci='=')printFile("-=","运算符");else if (ci='-')printFile("-","运算符");else if (ci>='0' && ci<='9')while (isDigit(ci)|ci='.')token =token +ci;get_ci();printFile(token,"数值");retractPoint();elseprintFile("-","运算符");retractPoint();break;case '*':get_ci();if (ci='=')printFile("*=","运算符");elseprintFile("*","运算符");retractPoint();break;case '=':get_ci();if (ci='=')printFile("=","运算符");elseprintFile("=","运算符");retractPoint();break;case '>':get_ci();if (ci='=')printFile(">=","运算符");else if (ci='>')printFile(">>","运算符");elseprintFile(">","运算符");retractPoint();break;case'(' :printFile ("(","界符"); break;case')' :printFile (")","界符"); break;case'' :printFile ("","界符"); break;case'' :printFile ("","界符"); break;case'' :printFile ("","界符"); break;case'.' :printFile (".","界符"); break;case',' :printFile (",","界符"); break;case'' :printFile ("","界符"); break;case'' :printFile ("","界符"); break;case'#' :printFile ("#","界符");break;case ' ':/空格时,跳出循环case 'n':case 't':break;default:;infile.close();outfile.close();system("pause");/*/* 以下是子函数的定义 */*/void start()/此函数用于初始化cout<<"本程序读取程序所在目录下的ASCII编码文件 in.txt 进行词法分析"<<endl;cout<<"分析结果输出至当前目录下的文件 out.txt"<<endl;token="" /初始化takenp2=buf1; /指针预指向 buf163=-1; /每个缓冲数据结尾为-1buf263=-1;infile.open("in.txt",ios:in);/打开文件if (!infile)cout<<endl<<"错误警告:未在程序当前目录下找到文件 in.txt"<<endl;system("pause");exit(-1);outfile.open("out.txt",ios:out);/输出文件,方式:覆盖for (int i=0;i<63;i+)infile.get(buf1i);void get_ci()if (*p2=-1)/p2指向某一个缓冲buf的结尾处时if (p2=buf1+63)/p2指针的值 = buf1的地址+63 ->说明p2位于第一缓冲区结尾for (int i=0;i<63;i+)/读取文件到buf2buf2i=infile.get();p2=buf2;get_ci();/递归调用自身(只一层而已)else if (p2=buf2+63)/p2指向buf2的结尾时for (int i=0;i<63;i+)/读取文件到buf1buf1i=infile.get();p2=buf1;get_ci();/递归调用自身(只一层而已)else /p2没有指向buf1的结尾,也没有指向buf2的结尾,说明文件读取结束了infile.close();outfile.close();cout<<"分析结束"<<endl;system("pause");exit(-1);elseci= *p2;p2=p2+1;bool isLetter(int c) /判断是否是 字母if (c>='A' && c<='Z')|(c>='a' && c<='z')return true;elsereturn false;bool isDigit(int c) /判断是否是 数字if (c>='0' && c<='9')return true;elsereturn false;bool isUnderLine(int c) /判断是否是 下划线if (c='_')return true;elsereturn false;bool isKeyWord(string s) /判断是否是 关键字for (int i=0;i<KEYWORDLENGTH;i+)if (s = keyWordi)return true;return false;void printFile(string s1,string s2)/输出至文件outfile<<s1<<" "<<s2<<endl;void retractPoint()/p2指针前移撤销一位if (p2=buf1)/若当前指针指向buf1的头p2=buf2+63;/则重置为buf2的末尾-1elseif (p2=buf2)/若当前指向buf2的头p2=buf1+63;elsep2-;char getType(char c) /获取ci的类型 代表if (c>='A' && c<='Z')|(c>='a' && c<='z')return 'a'else if (c>='0' && c<='9')return '0' ;elsereturn c;