MODULE; IMPORT Fmt, Text, TextList, Wr; <*FATAL ANY*> CONST IndentAmount = 2; PROCEDURE HTML Dump (html: T; wr: Wr.T) = PROCEDURE Out (t: TEXT) = BEGIN Wr.PutText(wr, t); END Out; PROCEDURE NL (indent: INTEGER) = BEGIN Wr.PutText(wr, "\n"); FOR i := 1 TO indent DO Wr.PutChar(wr, ' '); END; END NL; PROCEDURE WalkSequence (seq: Sequence; indent: INTEGER) = BEGIN NL(indent); Out("SEQUENCE"); INC(indent, IndentAmount); WHILE seq # NIL DO NL(indent); TYPECASE seq OF | NULL => Out("<null>") | Word (word) => Out("<word>:" & word.word); | Paragraph => Out("<paragraph>"); | LineBreak => Out("<line break>"); | HorizontalRule => Out("<horizontal rule>"); | Glossary (glossary) => VAR gs := glossary.content; BEGIN NL(indent); Out("GLOSSARY"); INC(indent, IndentAmount); WHILE gs # NIL DO NL(indent); Out("TERM"); WalkSequence(gs.term, indent); NL(indent); Out("DEF"); WalkSequence(gs.definition, indent); gs := gs.next; END; DEC(indent, IndentAmount); END; | List (list) => VAR item := list.content; BEGIN NL(indent); Out("LIST"); INC(indent, IndentAmount); WHILE item # NIL DO WalkSequence(item.content, indent); item := item.next; END; DEC(indent, IndentAmount); END; | Preformatted (pre) => Out("<preformatted>"); WalkSequence(pre.content, indent); | Typewriter (format) => Out("<typewriter>"); WalkSequence(format.content, indent); | Boldface (format) => Out("<boldface>"); WalkSequence(format.content, indent); | Italic (format) => Out("<italic>"); WalkSequence(format.content, indent); | Underline (format) => Out("<underline>"); WalkSequence(format.content, indent); | Emphasis (format) => Out("<emphasis>"); WalkSequence(format.content, indent); | Strong (format) => Out("<strong>"); WalkSequence(format.content, indent); | Code (format) => Out("<code>"); WalkSequence(format.content, indent); | Sample (format) => Out("<sample>"); WalkSequence(format.content, indent); | Keyboard (format) => Out("<keyboard>"); WalkSequence(format.content, indent); | Definition (format) => Out("<definition>"); WalkSequence(format.content, indent); | Variable (format) => Out("<variable>"); WalkSequence(format.content, indent); | Citation (format) => Out("<citation>"); WalkSequence(format.content, indent); | Anchor (anchor) => IF anchor.href = NIL THEN Out("NAME-ANCHOR:"); WalkSequence(anchor.content, indent); ELSE Out("ANCHOR:"); WalkSequence(anchor.content, indent); END; | Heading (heading) => NL(indent); Out("HEADING" & Fmt.Int(heading.level)); WalkSequence(heading.content, indent); | Address (addr) => Out("<address>"); WalkSequence(addr.content, indent); | BlockQuote (quote) => Out("<block quote>"); WalkSequence(quote.content, indent); | Image (image) => Out("<image>"); VAR alt := image.alternate; BEGIN IF alt = NIL THEN alt := "<<IMAGE>>" END; Out(alt); END; | Oblet (oblet) => Out("<oblet:" & oblet.source & ">"); | Table (format) => Out("<table>"); WalkSequence(format.content, indent); | TableRow (format) => Out("<table row>"); WalkSequence(format.content, indent); ELSE Out("<????>"); END; seq := seq.next; END; DEC(indent, IndentAmount); END WalkSequence; BEGIN Out("TITLE: " & html.title); IF html.body # NIL THEN Out("BODY"); NL(0); WalkSequence(html.body, 0); END; END Dump; PROCEDUREGetLinks (html: T): TextList.T = VAR links: TextList.T; PROCEDURE LinksInSequence (seq: Sequence) = BEGIN WHILE seq # NIL DO TYPECASE seq OF | NULL => | Word => | Paragraph => | LineBreak => | HorizontalRule => | Glossary (glossary) => VAR g := glossary.content; BEGIN WHILE g # NIL DO LinksInSequence(g.term); LinksInSequence(g.definition); g := g.next; END; END; | List (list) => VAR item := list.content; BEGIN WHILE item # NIL DO LinksInSequence(item.content); item := item.next; END; END; | Preformatted (pre) => LinksInSequence(pre.content); | Typewriter (format) => LinksInSequence(format.content); | Boldface (format) => LinksInSequence(format.content); | Italic (format) => LinksInSequence(format.content); | Underline (format) => LinksInSequence(format.content); | Emphasis (format) => LinksInSequence(format.content); | Strong (format) => LinksInSequence(format.content); | Code (format) => LinksInSequence(format.content); | Sample (format) => LinksInSequence(format.content); | Keyboard (format) => LinksInSequence(format.content); | Definition (format) => LinksInSequence(format.content); | Variable (format) => LinksInSequence(format.content); | Citation (format) => LinksInSequence(format.content); | Anchor (anchor) => IF anchor.href # NIL THEN VAR pos := Text.FindChar(anchor.href, '#', 0); BEGIN IF pos = -1 THEN links := TextList.Cons(anchor.href, links); END END END; | Heading (heading) => LinksInSequence(heading.content); | Address (addr) => LinksInSequence(addr.content); | BlockQuote (quote) => LinksInSequence(quote.content); | Image (<*NOWARN*> image ) => (* links := TextList.Cons(image.source, links); *) | Oblet => ELSE END; seq := seq.next; END; END LinksInSequence; BEGIN links := NIL; LinksInSequence(html.body); RETURN TextList.ReverseD(links); END GetLinks; BEGIN END HTML.