00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #include <stdlib.h>
00037 #include <string.h>
00038 #include "token.h"
00039 #include "hash.h"
00040 #include "ptree.h"
00041 #include "buffer.h"
00042
00043 #if defined(_PARSE_VCC)
00044 #define _MSC_VER 1100
00045 #endif
00046
00047 #if defined(_MSC_VER)
00048 #include <assert.h>
00049 #endif
00050
00051 extern BOOL regularCpp;
00052 static void InitializeOtherKeywords();
00053
00054 #ifdef TEST
00055
00056 #ifdef __GNUG__
00057 #define token(x) (long)#x
00058 #else
00059 #define token(x) (long)"x"
00060 #endif
00061
00062 #else
00063
00064 #define token(x) x
00065
00066 #endif
00067
00068
00069
00070 HashTable* Lex::user_keywords = nil;
00071 Ptree* Lex::comments = nil;
00072
00073 Lex::Lex(Program* prog) : fifo(this)
00074 {
00075 file = prog;
00076 prog->Rewind();
00077 last_token = '\n';
00078 tokenp = 0;
00079 token_len = 0;
00080
00081 InitializeOtherKeywords();
00082 }
00083
00084 char* Lex::Save()
00085 {
00086 char* pos;
00087 int len;
00088
00089 fifo.Peek(0, pos, len);
00090 return pos;
00091 }
00092
00093 void Lex::Restore(char* pos)
00094 {
00095 last_token = '\n';
00096 tokenp = 0;
00097 token_len = 0;
00098 fifo.Clear();
00099 Rewind(pos);
00100 }
00101
00102
00103
00104 void Lex::GetOnlyClosingBracket(Token& t)
00105 {
00106 Restore(t.ptr + 1);
00107 }
00108
00109 uint Lex::LineNumber(char* pos, char*& ptr, int& len)
00110 {
00111 return file->LineNumber(pos, ptr, len);
00112 }
00113
00114 int Lex::GetToken(Token& t)
00115 {
00116 t.kind = fifo.Pop(t.ptr, t.len);
00117 return t.kind;
00118 }
00119
00120 int Lex::LookAhead(int offset)
00121 {
00122 return fifo.Peek(offset);
00123 }
00124
00125 int Lex::LookAhead(int offset, Token& t)
00126 {
00127 t.kind = fifo.Peek(offset, t.ptr, t.len);
00128 return t.kind;
00129 }
00130
00131 char* Lex::TokenPosition()
00132 {
00133 return (char*)file->Read(Tokenp());
00134 }
00135
00136 char Lex::Ref(uint i)
00137 {
00138 return file->Ref(i);
00139 }
00140
00141 void Lex::Rewind(char* p)
00142 {
00143 file->Rewind(p - file->Read(0));
00144 }
00145
00146 bool Lex::RecordKeyword(char* keyword, int token)
00147 {
00148 int index;
00149 char* str;
00150
00151 if(keyword == nil)
00152 return FALSE;
00153
00154 str = new(GC) char[strlen(keyword) + 1];
00155 strcpy(str, keyword);
00156
00157 if(user_keywords == nil)
00158 user_keywords = new HashTable;
00159
00160 if(user_keywords->AddEntry(str, (HashValue)token, &index) >= 0)
00161 return TRUE;
00162 else
00163 return bool(user_keywords->Peek(index) == (HashValue)token);
00164 }
00165
00166 bool Lex::Reify(Ptree* t, unsigned int& value)
00167 {
00168 if(t == nil || !t->IsLeaf())
00169 return FALSE;
00170
00171 char* p = t->GetPosition();
00172 int len = t->GetLength();
00173 value = 0;
00174 if(len > 2 && *p == '0' && is_xletter(p[1])){
00175 for(int i = 2; i < len; ++i){
00176 char c = p[i];
00177 if(is_digit(c))
00178 value = value * 0x10 + (c - '0');
00179 else if('A' <= c && c <= 'F')
00180 value = value * 0x10 + (c - 'A' + 10);
00181 else if('a' <= c && c <= 'f')
00182 value = value * 0x10 + (c - 'a' + 10);
00183 else if(is_int_suffix(c))
00184 break;
00185 else
00186 return FALSE;
00187 }
00188
00189 return TRUE;
00190 }
00191 else if(len > 0 && is_digit(*p)){
00192 for(int i = 0; i < len; ++i){
00193 char c = p[i];
00194 if(is_digit(c))
00195 value = value * 10 + c - '0';
00196 else if(is_int_suffix(c))
00197 break;
00198 else
00199 return FALSE;
00200 }
00201
00202 return TRUE;
00203 }
00204 else
00205 return FALSE;
00206 }
00207
00208
00209
00210 bool Lex::Reify(Ptree* t, char*& str)
00211 {
00212 if(t == nil || !t->IsLeaf())
00213 return FALSE;
00214
00215 char* p = t->GetPosition();
00216 int length = t->GetLength();
00217 if(*p != '"')
00218 return FALSE;
00219 else{
00220 str = new(GC) char[length];
00221 char* sp = str;
00222 for(int i = 1; i < length; ++i)
00223 if(p[i] != '"'){
00224 *sp++ = p[i];
00225 if(p[i] == '\\' && i + 1 < length)
00226 *sp++ = p[++i];
00227 }
00228 else
00229 while(++i < length && p[i] != '"')
00230 ;
00231
00232 *sp = '\0';
00233 return TRUE;
00234 }
00235 }
00236
00237
00238
00239 Lex::TokenFifo::TokenFifo(Lex* l)
00240 {
00241 lex = l;
00242 size = 16;
00243 ring = new (GC) Slot[size];
00244 head = tail = 0;
00245 }
00246
00247 Lex::TokenFifo::~TokenFifo()
00248 {
00249
00250 }
00251
00252 void Lex::TokenFifo::Clear()
00253 {
00254 head = tail = 0;
00255 }
00256
00257 void Lex::TokenFifo::Push(int token, char* pos, int len)
00258 {
00259 const int Plus = 16;
00260 ring[head].token = token;
00261 ring[head].pos = pos;
00262 ring[head].len = len;
00263 head = (head + 1) % size;
00264 if(head == tail){
00265 Slot* ring2 = new (GC) Slot[size + Plus];
00266 int i = 0;
00267 do{
00268 ring2[i++] = ring[tail];
00269 tail = (tail + 1) % size;
00270 } while(head != tail);
00271 head = i;
00272 tail = 0;
00273 size += Plus;
00274
00275 ring = ring2;
00276 }
00277 }
00278
00279 int Lex::TokenFifo::Pop(char*& pos, int& len)
00280 {
00281 if(head == tail)
00282 return lex->ReadToken(pos, len);
00283
00284 int t = ring[tail].token;
00285 pos = ring[tail].pos;
00286 len = ring[tail].len;
00287 tail = (tail + 1) % size;
00288 return t;
00289 }
00290
00291 int Lex::TokenFifo::Peek(int offset)
00292 {
00293 return ring[Peek2(offset)].token;
00294 }
00295
00296 int Lex::TokenFifo::Peek(int offset, char*& pos, int& len)
00297 {
00298 int cur = Peek2(offset);
00299 pos = ring[cur].pos;
00300 len = ring[cur].len;
00301 return ring[cur].token;
00302 }
00303
00304 int Lex::TokenFifo::Peek2(int offset)
00305 {
00306 int i;
00307 int cur = tail;
00308
00309 for(i = 0; i <= offset; ++i){
00310 if(head == cur){
00311 while(i++ <= offset){
00312 char* p;
00313 int l;
00314 int t = lex->ReadToken(p, l);
00315 Push(t, p, l);
00316 }
00317
00318 break;
00319 }
00320
00321 cur = (cur + 1) % size;
00322 }
00323
00324 return (tail + offset) % size;
00325 }
00326
00327
00328
00329
00330
00331 int Lex::ReadToken(char*& ptr, int& len)
00332 {
00333 int t;
00334
00335 for(;;){
00336 t = ReadLine();
00337
00338 if(t == Ignore)
00339 continue;
00340
00341 last_token = t;
00342
00343 #if defined(__GNUG__) || defined(_GNUG_SYNTAX)
00344 if(t == ATTRIBUTE){
00345 SkipAttributeToken();
00346 continue;
00347 }
00348 else if(t == EXTENSION){
00349 t = SkipExtensionToken(ptr, len);
00350 if(t == Ignore)
00351 continue;
00352 else
00353 return t;
00354 }
00355 #endif
00356 #if defined(_MSC_VER)
00357 if(t == ASM){
00358 SkipAsmToken();
00359 continue;
00360 }
00361 else if(t == DECLSPEC){
00362 SkipDeclspecToken();
00363 continue;
00364 }
00365 #endif
00366 if(t != '\n')
00367 break;
00368 }
00369
00370 ptr = TokenPosition();
00371 len = TokenLen();
00372 return t;
00373 }
00374
00375
00376
00377 void Lex::SkipAttributeToken()
00378 {
00379 char c;
00380
00381 do{
00382 c = file->Get();
00383 }while(c != '(' && c != '\0');
00384
00385 int i = 1;
00386 do{
00387 c = file->Get();
00388 if(c == '(')
00389 ++i;
00390 else if(c == ')')
00391 --i;
00392 else if(c == '\0')
00393 break;
00394 } while(i > 0);
00395 }
00396
00397
00398
00399 int Lex::SkipExtensionToken(char*& ptr, int& len)
00400 {
00401 ptr = TokenPosition();
00402 len = TokenLen();
00403
00404 char c;
00405
00406 do{
00407 c = file->Get();
00408 }while(is_blank(c) || c == '\n');
00409
00410 if(c != '('){
00411 file->Unget();
00412 return Ignore;
00413 }
00414
00415 int i = 1;
00416 do{
00417 c = file->Get();
00418 if(c == '(')
00419 ++i;
00420 else if(c == ')')
00421 --i;
00422 else if(c == '\0')
00423 break;
00424 } while(i > 0);
00425
00426 return Identifier;
00427 }
00428
00429 #if defined(_MSC_VER)
00430
00431 #define CHECK_END_OF_INSTRUCTION(C, EOI) \
00432 if (C == '\0') return; \
00433 if (strchr(EOI, C)) { \
00434 this->file->Unget(); \
00435 return; \
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454 void Lex::SkipAsmToken()
00455 {
00456 char c;
00457
00458 do{
00459 c = file->Get();
00460 CHECK_END_OF_INSTRUCTION(c, "");
00461 }while(is_blank(c) || c == '\n');
00462
00463 if(c == '{'){
00464 int i = 1;
00465 do{
00466 c = file->Get();
00467 CHECK_END_OF_INSTRUCTION(c, "");
00468 if(c == '{')
00469 ++i;
00470 else if(c == '}')
00471 --i;
00472 } while(i > 0);
00473 }
00474 else{
00475 for(;;){
00476 CHECK_END_OF_INSTRUCTION(c, "}\n");
00477 c = file->Get();
00478 }
00479 }
00480 }
00481
00482
00483
00484 void Lex::SkipDeclspecToken()
00485 {
00486 char c;
00487
00488 do{
00489 c = file->Get();
00490 CHECK_END_OF_INSTRUCTION(c, "");
00491 }while(is_blank(c));
00492
00493 if (c == '(') {
00494 int i = 1;
00495 do{
00496 c = file->Get();
00497 CHECK_END_OF_INSTRUCTION(c, "};");
00498 if(c == '(')
00499 ++i;
00500 else if(c == ')')
00501 --i;
00502 }while(i > 0);
00503 }
00504 }
00505
00506 #undef CHECK_END_OF_INSTRUCTION
00507
00508 #endif
00509
00510 char Lex::GetNextNonWhiteChar()
00511 {
00512 char c;
00513
00514 for(;;){
00515 do{
00516 c = file->Get();
00517 }while(is_blank(c));
00518
00519 if(c != '\\')
00520 break;
00521
00522 c = file->Get();
00523 if(c != '\n' && c!= '\r') {
00524 file->Unget();
00525 break;
00526 }
00527 }
00528
00529 return c;
00530 }
00531
00532 int Lex::ReadLine()
00533 {
00534 char c;
00535 uint top;
00536
00537 c = GetNextNonWhiteChar();
00538
00539 tokenp = top = file->GetCurPos();
00540 if(c == '\0'){
00541 file->Unget();
00542 return '\0';
00543 }
00544 else if(c == '\n')
00545 return '\n';
00546 else if(c == '#' && last_token == '\n'){
00547 if(ReadLineDirective())
00548 return '\n';
00549 else{
00550 file->Rewind(top + 1);
00551 token_len = 1;
00552 return SingleCharOp(c);
00553 }
00554 }
00555 else if(c == '\'' || c == '"'){
00556 if(c == '\''){
00557 if(ReadCharConst(top))
00558 return token(CharConst);
00559 }
00560 else{
00561 if(ReadStrConst(top))
00562 return token(StringL);
00563 }
00564
00565 file->Rewind(top + 1);
00566 token_len = 1;
00567 return SingleCharOp(c);
00568 }
00569 else if(is_digit(c))
00570 return ReadNumber(c, top);
00571 else if(c == '.'){
00572 c = file->Get();
00573 if(is_digit(c))
00574 return ReadFloat(top);
00575 else{
00576 file->Unget();
00577 return ReadSeparator('.', top);
00578 }
00579 }
00580 else if(is_letter(c))
00581 return ReadIdentifier(top);
00582 else
00583 return ReadSeparator(c, top);
00584 }
00585
00586 bool Lex::ReadCharConst(uint top)
00587 {
00588 char c;
00589
00590 for(;;){
00591 c = file->Get();
00592 if(c == '\\'){
00593 c = file->Get();
00594 if(c == '\0')
00595 return FALSE;
00596 }
00597 else if(c == '\''){
00598 token_len = int(file->GetCurPos() - top + 1);
00599 return TRUE;
00600 }
00601 else if(c == '\n' || c == '\0')
00602 return FALSE;
00603 }
00604 }
00605
00606
00607
00608
00609
00610
00611 bool Lex::ReadStrConst(uint top)
00612 {
00613 char c;
00614
00615 for(;;){
00616 c = file->Get();
00617 if(c == '\\'){
00618 c = file->Get();
00619 if(c == '\0')
00620 return FALSE;
00621 }
00622 else if(c == '"'){
00623 uint pos = file->GetCurPos() + 1;
00624 int nline = 0;
00625 do{
00626 c = file->Get();
00627 if(c == '\n')
00628 ++nline;
00629 } while(is_blank(c) || c == '\n');
00630
00631 if(c == '"')
00632 ;
00633 else{
00634 token_len = int(pos - top);
00635 file->Rewind(pos);
00636 return TRUE;
00637 }
00638 }
00639 else if(c == '\n' || c == '\0')
00640 return FALSE;
00641 }
00642 }
00643
00644 int Lex::ReadNumber(char c, uint top)
00645 {
00646 char c2 = file->Get();
00647
00648 if(c == '0' && is_xletter(c2)){
00649 do{
00650 c = file->Get();
00651 } while(is_hexdigit(c));
00652 while(is_int_suffix(c))
00653 c = file->Get();
00654
00655 file->Unget();
00656 token_len = int(file->GetCurPos() - top + 1);
00657 return token(Constant);
00658 }
00659
00660 while(is_digit(c2))
00661 c2 = file->Get();
00662
00663 if(is_int_suffix(c2))
00664 do{
00665 c2 = file->Get();
00666 }while(is_int_suffix(c2));
00667 else if(c2 == '.')
00668 return ReadFloat(top);
00669 else if(is_eletter(c2)){
00670 file->Unget();
00671 return ReadFloat(top);
00672 }
00673
00674 file->Unget();
00675 token_len = int(file->GetCurPos() - top + 1);
00676 return token(Constant);
00677 }
00678
00679 int Lex::ReadFloat(uint top)
00680 {
00681 char c;
00682
00683 do{
00684 c = file->Get();
00685 }while(is_digit(c));
00686 if(is_float_suffix(c))
00687 do{
00688 c = file->Get();
00689 }while(is_float_suffix(c));
00690 else if(is_eletter(c)){
00691 uint p = file->GetCurPos();
00692 c = file->Get();
00693 if(c == '+' || c == '-'){
00694 c = file->Get();
00695 if(!is_digit(c)){
00696 file->Rewind(p);
00697 token_len = int(p - top);
00698 return token(Constant);
00699 }
00700 }
00701 else if(!is_digit(c)){
00702 file->Rewind(p);
00703 token_len = int(p - top);
00704 return token(Constant);
00705 }
00706
00707 do{
00708 c = file->Get();
00709 }while(is_digit(c));
00710
00711 while(is_float_suffix(c))
00712 c = file->Get();
00713 }
00714
00715 file->Unget();
00716 token_len = int(file->GetCurPos() - top + 1);
00717 return token(Constant);
00718 }
00719
00720
00721
00722 bool Lex::ReadLineDirective()
00723 {
00724 char c;
00725
00726 do{
00727 c = file->Get();
00728 }while(c != '\n' && c != '\0');
00729 return TRUE;
00730 }
00731
00732 int Lex::ReadIdentifier(uint top)
00733 {
00734 char c;
00735
00736 do{
00737 c = file->Get();
00738 }while(is_letter(c) || is_digit(c));
00739
00740 uint len = file->GetCurPos() - top;
00741 token_len = int(len);
00742 file->Unget();
00743
00744 return Screening((char*)file->Read(top), int(len));
00745 }
00746
00747
00748
00749
00750
00751 static struct rw_table {
00752 char* name;
00753 long value;
00754 } table[] = {
00755 #if defined(__GNUG__) || defined(_GNUG_SYNTAX)
00756 { "__alignof__", token(SIZEOF) },
00757 { "__asm__", token(ATTRIBUTE) },
00758 { "__attribute__", token(ATTRIBUTE) },
00759 { "__const", token(CONST) },
00760 { "__extension__", token(EXTENSION) },
00761 { "__inline__", token(INLINE) },
00762 { "__restrict", token(Ignore) },
00763 { "__signed", token(SIGNED) },
00764 { "__signed__", token(SIGNED) },
00765 #endif
00766 { "asm", token(ATTRIBUTE) },
00767 { "auto", token(AUTO) },
00768 #if !defined(_MSC_VER) || (_MSC_VER >= 1100)
00769 { "bool", token(BOOLEAN) },
00770 #endif
00771 { "break", token(BREAK) },
00772 { "case", token(CASE) },
00773 { "catch", token(CATCH) },
00774 { "char", token(CHAR) },
00775 { "class", token(CLASS) },
00776 { "const", token(CONST) },
00777 { "continue", token(CONTINUE) },
00778 { "default", token(DEFAULT) },
00779 { "delete", token(DELETE) },
00780 { "do", token(DO) },
00781 { "double", token(DOUBLE) },
00782 { "else", token(ELSE) },
00783 { "enum", token(ENUM) },
00784 { "extern", token(EXTERN) },
00785 { "float", token(FLOAT) },
00786 { "for", token(FOR) },
00787 { "friend", token(FRIEND) },
00788 { "goto", token(GOTO) },
00789 { "if", token(IF) },
00790 { "inline", token(INLINE) },
00791 { "int", token(INT) },
00792 { "long", token(LONG) },
00793 { "metaclass", token(METACLASS) },
00794 { "mutable", token(MUTABLE) },
00795 { "namespace", token(NAMESPACE) },
00796 { "new", token(NEW) },
00797 { "operator", token(OPERATOR) },
00798 { "private", token(PRIVATE) },
00799 { "protected", token(PROTECTED) },
00800 { "public", token(PUBLIC) },
00801 { "register", token(REGISTER) },
00802 { "return", token(RETURN) },
00803 { "short", token(SHORT) },
00804 { "signed", token(SIGNED) },
00805 { "sizeof", token(SIZEOF) },
00806 { "static", token(STATIC) },
00807 { "struct", token(STRUCT) },
00808 { "switch", token(SWITCH) },
00809 { "template", token(TEMPLATE) },
00810 { "this", token(THIS) },
00811 { "throw", token(THROW) },
00812 { "try", token(TRY) },
00813 { "typedef", token(TYPEDEF) },
00814 { "typename", token(CLASS) },
00815 { "union", token(UNION) },
00816 { "unsigned", token(UNSIGNED) },
00817 { "using", token(USING) },
00818 { "virtual", token(VIRTUAL) },
00819 { "void", token(VOID) },
00820 { "volatile", token(VOLATILE) },
00821 { "while", token(WHILE) },
00822
00823 };
00824
00825 static void InitializeOtherKeywords()
00826 {
00827 static BOOL done = FALSE;
00828
00829 if(done)
00830 return;
00831 else
00832 done = TRUE;
00833
00834 if(regularCpp)
00835 for(unsigned int i = 0; i < sizeof(table) / sizeof(table[0]); ++i)
00836 if(table[i].value == METACLASS){
00837 table[i].value = Identifier;
00838 break;
00839 }
00840
00841 #if defined(_MSC_VER)
00842 assert(Lex::RecordKeyword("cdecl", Ignore));
00843 assert(Lex::RecordKeyword("_cdecl", Ignore));
00844 assert(Lex::RecordKeyword("__cdecl", Ignore));
00845
00846 assert(Lex::RecordKeyword("_fastcall", Ignore));
00847 assert(Lex::RecordKeyword("__fastcall", Ignore));
00848
00849 assert(Lex::RecordKeyword("_based", Ignore));
00850 assert(Lex::RecordKeyword("__based", Ignore));
00851
00852 assert(Lex::RecordKeyword("_asm", ASM));
00853 assert(Lex::RecordKeyword("__asm", ASM));
00854
00855 assert(Lex::RecordKeyword("_inline", INLINE));
00856 assert(Lex::RecordKeyword("__inline", INLINE));
00857
00858 assert(Lex::RecordKeyword("_stdcall", Ignore));
00859 assert(Lex::RecordKeyword("__stdcall", Ignore));
00860
00861 assert(Lex::RecordKeyword("__declspec", DECLSPEC));
00862
00863 assert(Lex::RecordKeyword("__int8", CHAR));
00864 assert(Lex::RecordKeyword("__int16", SHORT));
00865 assert(Lex::RecordKeyword("__int32", INT));
00866 assert(Lex::RecordKeyword("__int64", INT64));
00867 #endif
00868 }
00869
00870 int Lex::Screening(char *identifier, int len)
00871 {
00872 struct rw_table *low, *high, *mid;
00873 int c, token;
00874
00875 low = table;
00876 high = &table[sizeof(table) / sizeof(table[0]) - 1];
00877 while(low <= high){
00878 mid = low + (high - low) / 2;
00879 if((c = strncmp(mid->name, identifier, len)) == 0)
00880 if(mid->name[len] == '\0')
00881 return mid->value;
00882 else
00883 high = mid - 1;
00884 else if(c < 0)
00885 low = mid + 1;
00886 else
00887 high = mid - 1;
00888 }
00889
00890 if(user_keywords == nil)
00891 user_keywords = new HashTable;
00892
00893 if(user_keywords->Lookup(identifier, len, (HashValue*)&token))
00894 return token;
00895
00896 return token(Identifier);
00897 }
00898
00899 int Lex::ReadSeparator(char c, uint top)
00900 {
00901 char c1 = file->Get();
00902
00903 token_len = 2;
00904 if(c1 == '='){
00905 switch(c){
00906 case '*' :
00907 case '/' :
00908 case '%' :
00909 case '+' :
00910 case '-' :
00911 case '&' :
00912 case '^' :
00913 case '|' :
00914 return token(AssignOp);
00915 case '=' :
00916 case '!' :
00917 return token(EqualOp);
00918 case '<' :
00919 case '>' :
00920 return token(RelOp);
00921 default :
00922 file->Unget();
00923 token_len = 1;
00924 return SingleCharOp(c);
00925 }
00926 }
00927 else if(c == c1){
00928 switch(c){
00929 case '<' :
00930 case '>' :
00931 if(file->Get() != '='){
00932 file->Unget();
00933 return token(ShiftOp);
00934 }
00935 else{
00936 token_len = 3;
00937 return token(AssignOp);
00938 }
00939 case '|' :
00940 return token(LogOrOp);
00941 case '&' :
00942 return token(LogAndOp);
00943 case '+' :
00944 case '-' :
00945 return token(IncOp);
00946 case ':' :
00947 return token(Scope);
00948 case '.' :
00949 if(file->Get() == '.'){
00950 token_len = 3;
00951 return token(Ellipsis);
00952 }
00953 else
00954 file->Unget();
00955 case '/' :
00956 return ReadComment(c1, top);
00957 default :
00958 file->Unget();
00959 token_len = 1;
00960 return SingleCharOp(c);
00961 }
00962 }
00963 else if(c == '.' && c1 == '*')
00964 return token(PmOp);
00965 else if(c == '-' && c1 == '>')
00966 if(file->Get() == '*'){
00967 token_len = 3;
00968 return token(PmOp);
00969 }
00970 else{
00971 file->Unget();
00972 return token(ArrowOp);
00973 }
00974 else if(c == '/' && c1 == '*')
00975 return ReadComment(c1, top);
00976 else{
00977 file->Unget();
00978 token_len = 1;
00979 return SingleCharOp(c);
00980 }
00981
00982 cerr << "*** An invalid character has been found! ("
00983 << (int)c << ',' << (int)c1 << ")\n";
00984 return token(BadToken);
00985 }
00986
00987 int Lex::SingleCharOp(unsigned char c)
00988 {
00989
00990 static char valid[] = "x xx xxxxxxxx xxxxxx";
00991
00992 if('!' <= c && c <= '?' && valid[c - '!'] == 'x')
00993 return c;
00994 else if(c == '[' || c == ']' || c == '^')
00995 return c;
00996 else if('{' <= c && c <= '~')
00997 return c;
00998 else
00999 return token(BadToken);
01000 }
01001
01002 int Lex::ReadComment(char c, uint top) {
01003 uint len = 0;
01004 if (c == '*')
01005 do {
01006 c = file->Get();
01007 if (c == '*') {
01008 c = file->Get();
01009 if (c == '/') {
01010 len = 1;
01011 break;
01012 }
01013 else
01014 file->Unget();
01015 }
01016 }while(c != '\0');
01017 else
01018 do {
01019 c = file->Get();
01020 }while(c != '\n' && c != '\0');
01021
01022 len += file->GetCurPos() - top;
01023 token_len = int(len);
01024 Leaf* node = new Leaf((char*)file->Read(top), int(len));
01025 comments = Ptree::Snoc(comments, node);
01026 return Ignore;
01027 }
01028
01029 Ptree* Lex::GetComments() {
01030 Ptree* c = comments;
01031 comments = nil;
01032 return c;
01033 }
01034
01035 Ptree* Lex::GetComments2() {
01036 return comments;
01037 }
01038
01039 #ifdef TEST
01040 #include <stdio.h>
01041
01042 main()
01043 {
01044 int i = 0;
01045 Token token;
01046
01047 Lex lex(new ProgramFromStdin);
01048 for(;;){
01049
01050 int t = lex.LookAhead(i++, token);
01051 if(t == 0)
01052 break;
01053 else if(t < 128)
01054 printf("%c (%x): ", t, t);
01055 else
01056 printf("%-10.10s (%x): ", (char*)t, t);
01057
01058 putchar('"');
01059 while(token.len-- > 0)
01060 putchar(*token.ptr++);
01061
01062 puts("\"");
01063 };
01064 }
01065 #endif
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115