00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <string.h>
00017 #include <ctype.h>
00018 #include "slexer.hpp"
00019
00020 #ifndef EOF
00021 #define EOF (-1)
00022 #endif
00023
00024 extern SReference Charp2LispNumber(const char* p);
00025
00026 SLabel LexemaEof("#<END OF FILE>");
00027
00028 IntelibSLexAnalyser::IntelibSLexAnalyser()
00029 : specchars(0), special_state(0), state(home),
00030 postponed_char(-1), line(1), error_message(0)
00031 {}
00032
00033 IntelibSLexAnalyser::~IntelibSLexAnalyser()
00034 {
00035 while(specchars) {
00036 SpecChar *tmp = specchars->next;
00037 delete specchars;
00038 specchars = tmp;
00039 }
00040 }
00041
00042 bool IntelibSLexAnalyser::AddDelimiter(const char *prefix,
00043 const SReference &token)
00044 {
00045 SpecChar *spc = AddSpecial(prefix, false);
00046 if(!spc) return false;
00047 delimiter_chars += prefix[0];
00048 spc->token = token;
00049 return true;
00050 }
00051
00052 bool IntelibSLexAnalyser::AddNonDelimiter(const char *prefix,
00053 const SReference &token)
00054 {
00055 SpecChar *spc = AddSpecial(prefix, false);
00056 if(!spc) return false;
00057 spc->token = token;
00058 spc->status = SpecChar::non_delim;
00059 return true;
00060 }
00061
00062 bool IntelibSLexAnalyser::AddTokenStarter(const char *prefix,
00063 SReference (*fun)(const char *))
00064 {
00065 SpecChar *spc = AddSpecial(prefix, true);
00066 if(!spc) return false;
00067
00068 spc->makestring = fun;
00069 return true;
00070 }
00071
00072 bool IntelibSLexAnalyser::AddStringStarter(const char *prefix,
00073 int closer_char,
00074 SReference (*fun)(const char *))
00075 {
00076 SpecChar *spc = AddSpecial(prefix, true);
00077 if(!spc) return false;
00078 delimiter_chars += prefix[0];
00079 spc->status = SpecChar::read_string;
00080 spc->closer[0] = closer_char;
00081 spc->closer[1] = 0;
00082 spc->makestring = fun;
00083 return true;
00084 }
00085
00086 bool IntelibSLexAnalyser::AddCommentStarter(const char *prefix,
00087 const char *closer)
00088 {
00089 SpecChar *spc = AddSpecial(prefix, true);
00090 if(!spc) return false;
00091 delimiter_chars += prefix[0];
00092 spc->status = SpecChar::ignore_until;
00093 for(unsigned int i=0; i<sizeof(spc->closer)-1; i++) {
00094 if((spc->closer[i] = closer[i]) == 0) return true;
00095 }
00096 #if INTELIB_RUNTIME_CHECKS == 1
00097 error_message = "comment closer is too long";
00098 return false;
00099 #else
00100 spc->closer[sizeof(spc->closer)-1] = 0;
00101 #endif
00102 }
00103
00104 IntelibSLexAnalyser::SpecChar*
00105 IntelibSLexAnalyser::DoAddSpecial(SpecChar **p, const char *str, bool ex)
00106 {
00107
00108
00109 if(!*p) {
00110
00111 *p = new SpecChar;
00112 (*p)->ch = str[0];
00113 (*p)->sub = 0;
00114 (*p)->next = 0;
00115 if(str[1]) {
00116 (*p)->status = SpecChar::non_term;
00117 return DoAddSpecial(&((*p)->sub), str+1, ex);
00118 } else {
00119 (*p)->status = ex ? SpecChar::read_rest : SpecChar::term;
00120 return *p;
00121 }
00122 } else {
00123
00124
00125 if((*p)->ch == str[0]) {
00126
00127 if(str[1]) {
00128 if((*p)->status == SpecChar::read_rest) {
00129 error_message =
00130 "Previously-added extendable special lexem is "
00131 "a strict prefix of the new lexem";
00132 return 0;
00133 }
00134 return DoAddSpecial(&((*p)->sub), str+1, ex);
00135 } else {
00136 if((*p)->status != SpecChar::non_term) {
00137
00138 error_message = "duplicate or conflicting special lexems";
00139 return 0;
00140 }
00141 (*p)->status = ex ? SpecChar::read_rest : SpecChar::term;
00142 return *p;
00143 }
00144 } else {
00145
00146 return DoAddSpecial(&((*p)->next), str, ex);
00147 }
00148 }
00149 }
00150
00151 IntelibSLexAnalyser::SpecChar*
00152 IntelibSLexAnalyser::AddSpecial(const char *str, bool extendable)
00153 {
00154 if(!*str) {
00155 error_message = "can't add empty special lexem";
00156 return false;
00157 }
00158 return DoAddSpecial(&specchars, str, extendable);
00159 }
00160
00161 SReference IntelibSLexAnalyser::Get() const
00162 {
00163 return SReference(lex, lexline);
00164 }
00165
00166 void IntelibSLexAnalyser::Drop() {
00167 state = home;
00168 buf = "";
00169 lex = SReference();
00170 error_message = 0;
00171 }
00172
00173 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::FeedChar(int c)
00174 {
00175 if(postponed_char != -1) {
00176 int pp = postponed_char;
00177 postponed_char = -1;
00178 FeedResult res = FeedChar(pp);
00179 switch(res) {
00180 case res_eof:
00181 case res_error:
00182 case res_ready:
00183 postponed_char = c;
00184 return res;
00185 case res_empty:
00186 case res_continue:
00187 return FeedChar(c);
00188 default:
00189 return res_error;
00190 }
00191 throw IntelibX_bug();
00192 }
00193 if(c == '\n') line++;
00194 switch(state) {
00195 case home:
00196 return Home(c);
00197 case special:
00198 case special2:
00199 return Special(c);
00200 case special3:
00201 return Special3(c);
00202 case string:
00203 return String(c);
00204 case stringq:
00205 return Stringq(c);
00206 case token:
00207 return Token(c);
00208 case token_force:
00209 buf+=c;
00210 state = token;
00211 return res_continue;
00212 case comment:
00213 #if 0
00214 if(c == EOF) {
00215 state = home;
00216 return res_eof;
00217 }
00218 if(c == '\n')
00219 state = home;
00220 return res_empty;
00221 #endif
00222 return Comment(c);
00223 default:
00224 throw IntelibX_bug();
00225 }
00226 }
00227
00228 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::Home(int c)
00229 {
00230 lexline = line;
00231 switch(c) {
00232 case EOF:
00233 return res_eof;
00234 case ' ':
00235 case '\n':
00236 case '\t':
00237 case '\r':
00238 case '\f':
00239 case '\v':
00240 return res_continue;
00241 default:
00242 buf = "";
00243
00244 special_state = &specchars;
00245 FeedResult res = Special(c);
00246 if(res != res_error) {
00247 return res;
00248 }
00249
00250 buf += c;
00251 state = token;
00252 string_finalizer = 0;
00253 return res_continue;
00254 }
00255 }
00256
00257 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::Special(int c)
00258 {
00259 if(c == EOF) {
00260 error_message = "eof within or right after a special lexem";
00261 return res_error;
00262 }
00263 for(SpecChar *p = *special_state; p; p = p->next) {
00264 if(p->ch == c) {
00265 buf += c;
00266 if(p->status == SpecChar::term) {
00267 if(p->sub) {
00268 state = special2;
00269 lex = p->token;
00270 special_state = &(p->sub);
00271 return res_continue;
00272 } else {
00273 lex = p->token;
00274 buf = "";
00275 state = home;
00276 return res_ready;
00277 }
00278 }
00279 if(p->status == SpecChar::non_delim) {
00280 state = special3;
00281 lex = p->token;
00282 special_state = &(p->sub);
00283 return res_continue;
00284 }
00285 if(p->status == SpecChar::read_rest) {
00286 state = token_force;
00287 buf = "";
00288 string_finalizer = p->makestring;
00289 return res_continue;
00290 }
00291 if(p->status == SpecChar::read_string) {
00292 state = string;
00293 buf = "";
00294 closer = p->closer;
00295 string_finalizer = p->makestring;
00296 return res_continue;
00297 }
00298 if(p->status == SpecChar::ignore_until) {
00299 state = comment;
00300 buf = "";
00301 closer = p->closer;
00302 comment_closer_index = 0;
00303 return res_continue;
00304 }
00305 if(!p->sub)
00306 throw IntelibX_bug();
00307 state = special;
00308 special_state = &(p->sub);
00309 return res_continue;
00310 }
00311 }
00312 if(state == special2) {
00313
00314
00315 buf = "";
00316 state = home;
00317 postponed_char = c;
00318 return res_ready;
00319 } else if(state == special3) {
00320
00321 state = token;
00322 return res_continue;
00323 } else {
00324 error_message = "unexpected special lexem";
00325 return res_error;
00326 }
00327 }
00328
00329 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::Special3(int c)
00330 {
00331 if(c==EOF || IsDelimiterChar(c) || isspace(c))
00332 {
00333 state = home;
00334 buf = "";
00335 if(!isspace(c)) {
00336 postponed_char = c;
00337 }
00338 return res_ready;
00339 } else {
00340 return Special(c);
00341 }
00342 }
00343
00344 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::String(int c)
00345 {
00346 switch(c) {
00347 case EOF:
00348 error_message = "eof in string";
00349 return res_error;
00350 case '\\':
00351 state = stringq;
00352 return res_continue;
00353 default:
00354 if(c == closer[0]) {
00355 lex = string_finalizer ?
00356 string_finalizer(buf.c_str()) : SReference(buf);
00357 buf = "";
00358 state = home;
00359 return res_ready;
00360 } else {
00361 buf += c;
00362 return res_continue;
00363 }
00364 }
00365 }
00366
00367 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::Stringq(int c)
00368 {
00369 switch(c) {
00370 case EOF:
00371 error_message = "quoted eof";
00372 return res_error;
00373 case 'a': buf += '\a'; break;
00374 case 'b': buf += '\b'; break;
00375 case 'f': buf += '\f'; break;
00376 case 'n': buf += '\n'; break;
00377 case 'r': buf += '\r'; break;
00378 case 't': buf += '\t'; break;
00379 case 'v': buf += '\v'; break;
00380
00381
00382
00383
00384
00385 default:
00386 buf += c;
00387 }
00388 state = string;
00389 return res_continue;
00390 }
00391
00392 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::Token(int c)
00393 {
00394 if(c==EOF || IsDelimiterChar(c) || isspace(c))
00395 {
00396 state = home;
00397 if(!isspace(c)) {
00398 postponed_char = c;
00399 }
00400 lex = string_finalizer ?
00401 string_finalizer(buf.c_str()) : ProcessToken(buf.c_str());
00402 buf = "";
00403 if(!lex.GetPtr()) {
00404 return res_error;
00405 }
00406 return res_ready;
00407 } else {
00408 buf += c;
00409 return res_continue;
00410 }
00411 }
00412
00413 IntelibSLexAnalyser::FeedResult IntelibSLexAnalyser::Comment(int c)
00414 {
00415 if(closer[comment_closer_index] == c) {
00416 comment_closer_index++;
00417 if(closer[comment_closer_index] == 0) {
00418
00419 state = home;
00420 return res_empty;
00421 }
00422 } else {
00423 comment_closer_index = 0;
00424 }
00425 return res_continue;
00426 }
00427
00428 static bool IsNumber(const char *s, bool sign_ok = true, bool dot_ok = true)
00429 {
00430 if(!s || s[0] == 0) {
00431 return true;
00432 } else if(isdigit(s[0])) {
00433 return IsNumber(s+1, false, dot_ok);
00434 } else if(dot_ok && s[0] == '.') {
00435 return IsNumber(s+1, false, false);
00436 } else if(sign_ok && (s[0] == '-' || s[0] == '+') && s[1] != 0) {
00437 return IsNumber(s+1, false, dot_ok);
00438 } else
00439 return false;
00440 }
00441
00442 SReference IntelibSLexAnalyser::ProcessToken(const char *s)
00443 {
00444 #if 0
00445 static const struct {
00446 const char *name;
00447 char val;
00448 } charnames[] = {
00449 { "NEWLINE", '\n' },
00450 { "SPACE", ' ' },
00451 { "TAB", '\t' },
00452 { "BACKSPACE", '\010' },
00453 { "LINEFEED", '\012' },
00454 { "PAGE", '\014' },
00455 { "RETURN", '\015' },
00456 { "RUBOUT", '\177' },
00457 { 0, 0 }
00458 };
00459 if(s[0] == '.' && s[1] == 0)
00460 return LexemaDot;
00461 if(s[0] == '#') {
00462 if(s[1] == '\\') {
00463 if(s[3] == '\0') {
00464
00465 return SReference(s[2]);
00466 } else {
00467 int i;
00468 for(i = 0; charnames[i].name; i++) {
00469 if(strcasecmp(s+2, charnames[i].name) == 0)
00470 return SReference(charnames[i].val);
00471 }
00472 error_message = "invalid character name";
00473 return SReference();
00474 }
00475 } else {
00476
00477 return SReference(new SExpressionClassicAtom(s));
00478 }
00479 } else
00480 #endif
00481 if(IsNumber(s)) {
00482 return Charp2LispNumber(s);
00483 } else {
00484 return SReference(new SExpressionClassicAtom(s));
00485 }
00486 }
00487
00488 bool IntelibSLexAnalyser::IsDelimiterChar(int ch)
00489 {
00490 for(const char *tmp = delimiter_chars.c_str(); *tmp; tmp++)
00491 if(ch == (int)*tmp) return true;
00492 return false;
00493 }