MODULE; IMPORT Rd, Target, TInt, TFloat, TWord; IMPORT M3ID, M3Scanner, Text; FROM M3Scanner IMPORT TK_Comment, TK_Error, TK_EOF, TK_Ident, TK_Card_const, TK_Long_const, TK_Real_const, TK_Longreal_const, TK_Extended_const, TK_Char_const, TK_Text_const, TK_Begin_pragma, TK_End_pragma, TK_Comma; CONST Backslash = '\134'; (* ASCII / Unicode value *) TYPE SUPER = M3Scanner.Default; REVEAL T = T_ BRANDED "M3Lexer.T" OBJECT saved : BOOLEAN := FALSE; next_token : CARDINAL := TK_Comment; next_start : CARDINAL := 0; next_length : CARDINAL := 0; next_offset : CARDINAL := 0; next_line : CARDINAL := 0; next_column : CARDINAL := 0; next_msg : TEXT := NIL; OVERRIDES next := NextToken; className := ClassName; initFromRd := InitFromRd; initFromBuf := InitFromBuf; END; TYPE Pragma = REF RECORD id : M3ID.T; val : INTEGER; next : Pragma; END; VAR pragmas: Pragma := NIL; CONST CNames = ARRAY [TK_Inline .. TK_Fatal] OF TEXT { "<*INLINE*>", "<*EXTERNAL*>", "<*ASSERT*>", "<*UNUSED*>", "<*OBSOLETE*>", "<*TRACE*>", "<* calling convention *>", "<*FATAL*>" }; PROCEDURE M3Lexer ClassName (t: T; tk: INTEGER): TEXT = BEGIN IF (FIRST (CNames) <= tk) AND (tk <= LAST (CNames)) THEN RETURN CNames [tk]; ELSE RETURN SUPER.className (t, tk); END; END ClassName; PROCEDUREInitFromRd (t: T; rd: Rd.T): T = BEGIN pragmas := NIL; RETURN SUPER.initFromRd (t, rd, skip_comments := TRUE, split_pragmas := TRUE); END InitFromRd; PROCEDUREInitFromBuf (t: T; buf: Buf): T = BEGIN pragmas := NIL; RETURN SUPER.initFromBuf (t, buf, skip_comments := TRUE, split_pragmas := TRUE); END InitFromBuf; PROCEDURENextToken (t: T) = BEGIN IF (t.saved) THEN t.saved := FALSE; t.token := t.next_token; t.start := t.next_start; t.length := t.next_length; t.offset := t.next_offset; t.line := t.next_line; t.column := t.next_column; t.msg := t.next_msg; ELSE REPEAT SUPER.next (t); CASE t.token OF | TK_Ident => FixID (t); | TK_Card_const => FixInt (t, Target.Integer.bytes); | TK_Long_const => FixInt (t, Target.Longint.bytes); | TK_Real_const => FixFloat (t, Target.Precision.Short); | TK_Longreal_const => FixFloat (t, Target.Precision.Long); | TK_Extended_const => FixFloat (t, Target.Precision.Extended); | TK_Char_const => FixChar (t); | TK_Text_const => FixText (t); | TK_Begin_pragma => FixPragma (t); ELSE (* let the token through... *) END; UNTIL (t.token # TK_Comment); END; END NextToken; PROCEDUREFixID (t: T) = BEGIN t.id := M3ID.FromStr (SUBARRAY (t.buffer^, t.offset, t.length)); END FixID; PROCEDUREFixInt (t: T; bytes: CARDINAL) = VAR break := -1; base: INTEGER; BEGIN FOR i := t.offset TO t.offset + t.length - 1 DO IF (t.buffer [i] = '_') THEN (* we have a based integer... *) break := i; EXIT; END; END; IF (break < 0) THEN (* scan a simple integer *) IF NOT TInt.New (SUBARRAY (t.buffer^, t.offset, t.length), bytes, t.int) THEN Err (t, "illegal integer literal"); END; ELSIF NOT TInt.New (SUBARRAY (t.buffer^, t.offset, break - t.offset), bytes, t.int) OR NOT TInt.ToInt (t.int, base) OR (base < 2) OR (16 < base) THEN Err (t, "illegal base for integer literal"); ELSIF NOT TWord.New (SUBARRAY (t.buffer^, break+1, t.offset + t.length - break - 1), base, bytes, t.int) THEN Err (t, "illegal based integer literal"); END; END FixInt; PROCEDUREFixFloat (t: T; prec: Target.Precision) = BEGIN IF NOT TFloat.New (SUBARRAY (t.buffer^, t.offset, t.length), prec, t.float) THEN Err (t, "illegal floating-point literal"); END; END FixFloat; PROCEDUREFixChar (t: T) = VAR ch: CHAR; BEGIN ch := t.buffer [t.offset+1]; IF (t.length = 3) THEN (* ok *) ELSIF ConvertEscape (t, t.offset+1, t.length-2, ch) THEN (* ok *) ELSE Err (t, "illegal character literal"); END; t.char := ORD (ch); END FixChar; PROCEDUREFixText (t: T) = VAR len := 0; cur := t.offset + 1; (* ignore the surrounding quotes *) stop := cur + t.length - 2; (* ignore the surrounding quotes *) txt := ""; ch : CHAR; buf : ARRAY [0..255] OF CHAR; BEGIN WHILE (cur < stop) DO ch := t.buffer [cur]; IF (ch = Backslash) THEN IF (cur+1 < stop) AND ConvertEscape (t, cur, 2, ch) THEN INC (cur); ELSIF (cur+3 < stop) AND ConvertEscape (t, cur, 4, ch) THEN INC (cur, 3); ELSE Err (t, "unrecognized escape sequence in text literal"); RETURN; END; END; IF (len >= NUMBER (buf)) THEN txt := txt & Text.FromChars (buf); len := 0; END; buf [len] := ch; INC (len); INC (cur); END; IF (len > 0) THEN txt := txt & Text.FromChars (SUBARRAY (buf, 0, len)); END; t.text :=txt; END FixText; PROCEDUREConvertEscape (t: T; offs, len: INTEGER; VAR ch: CHAR): BOOLEAN = VAR c0, c1, c2: CHAR; BEGIN IF t.buffer[offs] # Backslash THEN RETURN FALSE; END; IF (len = 2) THEN c0 := t.buffer [offs+1]; IF (c0 = 'n') THEN ch := '\n'; RETURN TRUE; ELSIF (c0 = 't') THEN ch := '\t'; RETURN TRUE; ELSIF (c0 = 'r') THEN ch := '\r'; RETURN TRUE; ELSIF (c0 = 'f') THEN ch := '\f'; RETURN TRUE; ELSIF (c0 = '\'') THEN ch := '\''; RETURN TRUE; ELSIF (c0 = '\"') THEN ch := '\"'; RETURN TRUE; ELSIF (c0 = Backslash) THEN ch := Backslash; RETURN TRUE; END; ELSIF (len = 4) THEN c0 := t.buffer [offs+1]; c1 := t.buffer [offs+2]; c2 := t.buffer [offs+3]; IF ('0' <= c0) AND (c0 <= '7') AND ('0' <= c1) AND (c1 <= '7') AND ('0' <= c2) AND (c2 <= '7') THEN ch := VAL ( (ORD (c0) - ORD ('0')) * 64 + (ORD (c1) - ORD ('0')) * 8 + (ORD (c2) - ORD ('0')), CHAR); RETURN TRUE; END; END; RETURN FALSE; END ConvertEscape; PROCEDUREFixPragma (t: T) = VAR p: Pragma; save_start : CARDINAL; save_length : CARDINAL; save_offset : CARDINAL; save_line : CARDINAL; save_column : CARDINAL; BEGIN (* just in case we need to back up, save the current token *) <*ASSERT t.token = TK_Begin_pragma *> save_start := t.start; save_length := t.length; save_offset := t.offset; save_line := t.line; save_column := t.column; (* get the ID that defines the pragma *) SUPER.next (t); IF t.token # TK_Ident THEN (* oops, it's a badly formed pragma *) t.saved := TRUE; t.next_token := t.token; t.next_start := t.start; t.next_length := t.length; t.next_offset := t.offset; t.next_line := t.line; t.next_column := t.column; t.next_msg := t.msg; t.token := TK_Begin_pragma; t.start := save_start; t.length := save_length; t.offset := save_offset; t.line := save_line; t.column := save_column; t.msg := NIL; RETURN; END; t.id := M3ID.FromStr (SUBARRAY (t.buffer^, t.offset, t.length)); p := LookUpPragma (t.id); IF (p = NIL) AND Target.FindConvention (M3ID.ToText (t.id)) # NIL THEN (* it's a legit calling convention on the current target *) t.token := TK_CallConv; AddPragma (M3ID.ToText (t.id), TK_CallConv); ELSIF (p = NIL) THEN (* unknown pragma, restore the begin_pragma/id token sequence... *) t.saved := TRUE; t.next_token := t.token; t.next_start := t.start; t.next_length := t.length; t.next_offset := t.offset; t.next_line := t.line; t.next_column := t.column; t.next_msg := t.msg; t.token := TK_Begin_pragma; t.start := save_start; t.length := save_length; t.offset := save_offset; t.line := save_line; t.column := save_column; t.msg := NIL; ELSIF (p.val > 0) THEN (* it's a recognized pragma to pass through *) t.token := p.val; ELSIF (p.val = IGNORE_PRAGMA) THEN EatPragma (t); ELSIF (p.val = LINE_PRAGMA) THEN EatPragma (t); (* until we need something better... *) ELSIF (p.val = PRAGMA_PRAGMA) THEN SUPER.next (t); (* PRAGMA *) WHILE (t.token = TK_Ident) OR ((M3Scanner.First_Keyword <= t.token) AND (t.token <= M3Scanner.Last_Keyword)) DO FixID (t); IF LookUpPragma (t.id) = NIL THEN AddPragma (M3ID.ToText (t.id), IGNORE_PRAGMA); END; SUPER.next (t); (* Ident *) IF (t.token # TK_Comma) THEN EXIT END; SUPER.next (t); (* , *) END; IF (t.token # TK_End_pragma) THEN t.token := TK_Error; t.msg := "missing '*>' on <*PRAGMA*> pragma"; ELSE t.token := TK_Comment; (* fetch the next real token in the outer loop *) END; ELSE <*ASSERT FALSE*> END; END FixPragma; PROCEDURELookUpPragma (id: M3ID.T): Pragma = VAR p: Pragma; BEGIN IF (pragmas = NIL) THEN InitPragmas () END; p := pragmas; WHILE (p # NIL) AND (p.id # id) DO p := p.next; END; RETURN p; END LookUpPragma; PROCEDUREEatPragma (t: T) = VAR key := t.id; line := t.line; offs := t.offset; col := t.column; BEGIN LOOP IF (t.token = TK_End_pragma) THEN t.token := TK_Comment; (* so the top-level NextToken() cycles *) EXIT; ELSIF (t.token = TK_Error) THEN EXIT; ELSIF (t.token = TK_EOF) THEN t.token := TK_Error; t.line := line; t.offset := offs; t.column := col; t.msg := "<*" & M3ID.ToText(key) & "*> pragma is missing its closing '*>'"; EXIT; ELSE SUPER.next (t); END; END; END EatPragma; CONST IGNORE_PRAGMA = -1; PRAGMA_PRAGMA = -2; LINE_PRAGMA = -3; PROCEDUREInitPragmas () = BEGIN AddPragma ("TRACE", TK_Trace); AddPragma ("OBSOLETE", TK_Obsolete); AddPragma ("LINE", LINE_PRAGMA); AddPragma ("PRAGMA", PRAGMA_PRAGMA); AddPragma ("NOWARN", IGNORE_PRAGMA); AddPragma ("INLINE", TK_Inline); AddPragma ("UNUSED", TK_Unused); AddPragma ("FATAL", TK_Fatal); AddPragma ("EXTERNAL", TK_External); AddPragma ("ASSERT", TK_Assert); END InitPragmas; PROCEDUREAddPragma (txt: TEXT; val: INTEGER) = BEGIN pragmas := NEW (Pragma, id := M3ID.Add (txt), val := val, next := pragmas); END AddPragma; PROCEDUREErr (t: T; msg: TEXT) = BEGIN t.msg := msg & ": " & t.toText (); t.token := TK_Error; END Err; BEGIN END M3Lexer.