Copyright (C) 1992, Digital Equipment Corporation
All rights reserved.
See the file COPYRIGHT for a full description.
File: M3String.m3
Last modified on Fri Sep 16 14:14:22 PDT 1994 by kalsow
modified on Wed Nov 28 02:23:29 1990 by muller
MODULE M3String;
IMPORT M3Buf, Text, Word, CG, Target;
CONST
NO_UID = -1;
TYPE
Buf = ARRAY OF CHAR;
HashTable = REF ARRAY OF T;
REVEAL
T = BRANDED REF RECORD
prefix : T := NIL;
suffix : T := NIL;
body : TEXT := NIL;
length : INTEGER := 0;
hash : INTEGER := 0;
uid : INTEGER := 0;
next : T := NIL;
END;
(* There are two variants of a String.T:
(body # NIL) => the characters are in body
ELSE => prefix & suffix
*)
CONST
Digits = ARRAY [0..9] OF CHAR {'0','1','2','3','4','5','6','7','8','9'};
VAR
hashMask : INTEGER := 511; (* == 2^9-1 == 9 bits on *)
hashTable : HashTable := NIL;
next_t : T := NIL;
nStrings : INTEGER := 0;
-------------------------------------------------------------- exported ---
PROCEDURE Add (x: TEXT): T =
VAR buf: ARRAY [0..255] OF CHAR; ref: REF ARRAY OF CHAR;
BEGIN
IF (next_t = NIL) THEN next_t := NEW (T) END;
next_t.prefix := NIL;
next_t.suffix := NIL;
next_t.body := x;
next_t.length := Text.Length (x);
next_t.uid := NO_UID;
IF (next_t.length <= NUMBER (buf)) THEN
Text.SetChars (buf, x);
RETURN Intern (SUBARRAY (buf, 0, next_t.length));
ELSE
ref := NEW (REF ARRAY OF CHAR, next_t.length);
Text.SetChars (ref^, x);
RETURN Intern (ref^);
END;
END Add;
PROCEDURE FromStr (READONLY buf: Buf; length: INTEGER): T =
VAR t: T;
BEGIN
IF (next_t = NIL) THEN next_t := NEW (T) END;
next_t.prefix := NIL;
next_t.suffix := NIL;
next_t.body := NIL; (* for now *)
next_t.length := MIN (length, NUMBER (buf));
next_t.uid := NO_UID;
t := Intern (SUBARRAY (buf, 0, next_t.length));
RETURN t;
END FromStr;
PROCEDURE Concat (a, b: T): T =
VAR buf: ARRAY [0..3] OF CHAR;
BEGIN
IF (a = NIL) OR (a.length = 0) THEN RETURN b END;
IF (b = NIL) OR (b.length = 0) THEN RETURN a END;
IF (next_t = NIL) THEN next_t := NEW (T) END;
next_t.prefix := a;
next_t.suffix := b;
next_t.body := NIL;
next_t.length := a.length + b.length;
next_t.uid := NO_UID;
RETURN Intern (buf);
END Concat;
PROCEDURE ToText (t: T): TEXT =
VAR
buf : ARRAY [0..255] OF CHAR;
ref : REF ARRAY OF CHAR;
BEGIN
IF (t = NIL) THEN
RETURN NIL;
ELSIF (t.body # NIL) THEN
(* already done. *)
ELSIF (t.length <= NUMBER (buf)) THEN
Flatten (t, buf, 0);
t.body := Text.FromChars (SUBARRAY (buf, 0, t.length));
ELSE
ref := NEW (REF ARRAY OF CHAR, t.length);
Flatten (t, ref^, 0);
t.body := Text.FromChars (ref^);
END;
RETURN t.body;
END ToText;
PROCEDURE Put (wr: M3Buf.T; t: T) =
BEGIN
IF (t = NIL) THEN
(* done *)
ELSIF (t.body # NIL) THEN
FOR i := 0 TO t.length-1 DO EmitChar (wr, Text.GetChar (t.body, i)) END;
ELSE
Put (wr, t.prefix);
Put (wr, t.suffix);
END;
END Put;
PROCEDURE Init_chars (offset: INTEGER; t: T; is_const: BOOLEAN) =
BEGIN
IF (t = NIL) THEN
(* done *)
ELSIF (t.body # NIL) THEN
CG.Init_chars (offset, t.body, is_const);
ELSE
Init_chars (offset, t.prefix, is_const);
Init_chars (offset + t.prefix.length * Target.Char.size, t.suffix, is_const);
END;
END Init_chars;
PROCEDURE Length (t: T): INTEGER =
BEGIN
IF (t = NIL)
THEN RETURN 0;
ELSE RETURN t.length;
END;
END Length;
PROCEDURE GetUID (t: T): INTEGER =
BEGIN
RETURN t.uid;
END GetUID;
PROCEDURE SetUID (t: T; uid: INTEGER) =
BEGIN
t.uid := uid;
END SetUID;
PROCEDURE Hash (t: T): INTEGER =
BEGIN
IF (t = NIL)
THEN RETURN 953;
ELSE RETURN t.hash;
END;
END Hash;
-------------------------------------------------------------- internal ---
PROCEDURE Intern (READONLY buf: Buf): T =
VAR hash, bucket: INTEGER; t: T;
BEGIN
(* search the hash table *)
next_t.hash := 0;
hash := InternHash (next_t, 0, buf);
bucket := Word.And (hash, hashMask);
t := hashTable[bucket];
WHILE (t # NIL) DO
IF (t.hash = hash) AND Equal (t, next_t, buf) THEN RETURN t; END;
t := t.next;
END;
(* we didn't find the string => add it to the hash table *)
t := next_t;
t.hash := hash;
t.next := hashTable [bucket];
hashTable [bucket] := t;
next_t := NIL; (* since we've used it! *)
IF (t.prefix = NIL) AND (t.body = NIL) THEN
t.body := Text.FromChars (buf);
END;
INC (nStrings);
IF (nStrings > 2 * NUMBER (hashTable^)) THEN ExpandHashTable () END;
RETURN t;
END Intern;
PROCEDURE ExpandHashTable () =
VAR
n_old := NUMBER (hashTable^);
n_new := n_old + n_old;
new := NEW (HashTable, n_new);
newMask := hashMask + hashMask + 1;
t, u : T;
x : INTEGER;
BEGIN
FOR i := 0 TO n_new - 1 DO new[i] := NIL END;
FOR i := 0 TO n_old - 1 DO
t := hashTable [i];
WHILE (t # NIL) DO
u := t.next;
x := Word.And (t.hash, newMask);
t.next := new [x];
new [x] := t;
t := u;
END;
END;
hashMask := newMask;
hashTable := new;
END ExpandHashTable;
PROCEDURE InternHash (t: T; hash: INTEGER; READONLY buf: Buf): INTEGER =
BEGIN
IF (t = NIL) THEN RETURN 0 END;
IF (hash = 0) AND (t.hash # 0) THEN RETURN t.hash END;
IF (t.body # NIL) THEN
FOR i := 0 TO t.length - 1 DO
hash := Word.Plus (Word.Times (2, hash), ORD (Text.GetChar (t.body, i)));
END;
ELSIF (t.prefix # NIL) THEN
(* a concatentation *)
hash := InternHash (t.prefix, hash, buf);
hash := InternHash (t.suffix, hash, buf);
ELSE (* use the buffer *)
FOR i := 0 TO t.length - 1 DO
hash := Word.Plus (Word.Times (2, hash), ORD (buf[i]));
END;
END;
RETURN hash;
END InternHash;
PROCEDURE Equal (a, b: T; READONLY buf: Buf): BOOLEAN =
BEGIN
IF (a.length # b.length) THEN RETURN FALSE END;
FOR i := 0 TO a.length - 1 DO
IF GetCh (a, buf, i) # GetCh (b, buf, i) THEN RETURN FALSE; END;
END;
RETURN TRUE;
END Equal;
PROCEDURE GetCh (t: T; READONLY buf: Buf; i: INTEGER): CHAR =
VAR u: T;
BEGIN
(* walk the tree to find the right segment *)
WHILE (t.prefix # NIL) DO
u := t.prefix;
IF (u.length > i)
THEN t := t.prefix;
ELSE t := t.suffix; DEC (i, u.length);
END;
END;
IF (t.body # NIL)
THEN RETURN Text.GetChar (t.body, i);
ELSE RETURN buf[i];
END;
END GetCh;
PROCEDURE Flatten (t: T; VAR buf: Buf; start: INTEGER) =
BEGIN
IF (t = NIL) THEN
(* done *)
ELSIF (t.body # NIL) THEN
Text.SetChars (SUBARRAY (buf, start, t.length), t.body);
ELSE
WHILE (t # NIL) AND (t.body = NIL) DO
Flatten (t.suffix, buf, start + Length (t.prefix));
t := t.prefix;
END;
Flatten (t, buf, start);
END;
END Flatten;
PROCEDURE EmitChar (wr: M3Buf.T; c: CHAR) =
VAR i: INTEGER;
BEGIN
IF (c < ' ') OR (c = '\"') OR (c = '\'') OR ('~' < c) OR (c = '\\') THEN
i := Word.And (ORD (c), 255);
M3Buf.PutChar (wr, '\\');
M3Buf.PutChar (wr, Digits[i DIV 64]); i := Word.And (i, 63);
M3Buf.PutChar (wr, Digits[i DIV 8]); i := Word.And (i, 7);
M3Buf.PutChar (wr, Digits[i]);
ELSE (* simple graphic character *)
M3Buf.PutChar (wr, c);
END;
END EmitChar;
-------------------------------------------------------- initialization ---
PROCEDURE Initialize () =
BEGIN
<*ASSERT hashTable = NIL*>
hashTable := NEW (HashTable, hashMask+1);
FOR i := 0 TO LAST (hashTable^) DO hashTable[i] := NIL; END;
END Initialize;
PROCEDURE Reset () =
VAR t: T;
BEGIN
FOR i := FIRST (hashTable^) TO LAST (hashTable^) DO
t := hashTable[i];
WHILE (t # NIL) DO t.uid := NO_UID; t := t.next END;
END;
END Reset;
BEGIN
END M3String.