webvbt/src/HTML.m3


 Copyright (C) 1995, Digital Equipment Corporation                         
 All rights reserved.                                                      
 See the file COPYRIGHT for a full description.                            
                                                                           
 Last modified on Tue Aug 27 13:23:50 PDT 1996 by najork                   
      modified on Tue Apr  9 16:35:34 PDT 1996 by mhb                      

MODULE HTML;

IMPORT Fmt, Text, TextList, Wr;

<*FATAL ANY*>

CONST IndentAmount = 2;

PROCEDURE Dump (html: T; wr: Wr.T) =

  PROCEDURE Out (t: TEXT) =
    BEGIN
      Wr.PutText(wr, t);
    END Out;

  PROCEDURE NL (indent: INTEGER) =
    BEGIN
      Wr.PutText(wr, "\n");
      FOR i := 1 TO indent DO Wr.PutChar(wr, ' '); END;
    END NL;

  PROCEDURE WalkSequence (seq: Sequence; indent: INTEGER) =
  BEGIN
    NL(indent);
    Out("SEQUENCE");
    INC(indent, IndentAmount);

    WHILE seq # NIL DO
      NL(indent);

      TYPECASE seq OF

      | NULL => Out("<null>")

      | Word (word) => Out("<word>:" & word.word);

      | Paragraph => Out("<paragraph>");

      | LineBreak => Out("<line break>");

      | HorizontalRule => Out("<horizontal rule>");

      | Glossary (glossary) =>
          VAR gs := glossary.content;
          BEGIN
            NL(indent);
            Out("GLOSSARY");
            INC(indent, IndentAmount);
            WHILE gs # NIL DO
              NL(indent);
              Out("TERM");
              WalkSequence(gs.term, indent);
              NL(indent);
              Out("DEF");
              WalkSequence(gs.definition, indent);
              gs := gs.next;
            END;
            DEC(indent, IndentAmount);
          END;

      | List (list) =>
          VAR item := list.content;
          BEGIN
            NL(indent);
            Out("LIST");
            INC(indent, IndentAmount);
            WHILE item # NIL DO
              WalkSequence(item.content, indent);
              item := item.next;
            END;
            DEC(indent, IndentAmount);
          END;

      | Preformatted (pre) =>
          Out("<preformatted>");
          WalkSequence(pre.content, indent);

      | Typewriter (format) =>
          Out("<typewriter>");
          WalkSequence(format.content, indent);

      | Boldface (format) =>
          Out("<boldface>");
          WalkSequence(format.content, indent);

      | Italic (format) =>
          Out("<italic>");
          WalkSequence(format.content, indent);

      | Underline (format) =>
          Out("<underline>");
          WalkSequence(format.content, indent);

      | Emphasis (format) =>
          Out("<emphasis>");
          WalkSequence(format.content, indent);

      | Strong (format) =>
          Out("<strong>");
          WalkSequence(format.content, indent);

      | Code (format) =>
          Out("<code>");
          WalkSequence(format.content, indent);

      | Sample (format) =>
          Out("<sample>");
          WalkSequence(format.content, indent);

      | Keyboard (format) =>
          Out("<keyboard>");
          WalkSequence(format.content, indent);

      | Definition (format) =>
          Out("<definition>");
          WalkSequence(format.content, indent);

      | Variable (format) =>
          Out("<variable>");
          WalkSequence(format.content, indent);

      | Citation (format) =>
          Out("<citation>");
          WalkSequence(format.content, indent);

      | Anchor (anchor) =>
          IF anchor.href = NIL THEN
            Out("NAME-ANCHOR:");
            WalkSequence(anchor.content, indent);
          ELSE
            Out("ANCHOR:");
            WalkSequence(anchor.content, indent);
          END;

      | Heading (heading) =>
          NL(indent);
          Out("HEADING" & Fmt.Int(heading.level));
          WalkSequence(heading.content, indent);

      | Address (addr) =>
          Out("<address>");
          WalkSequence(addr.content, indent);

      | BlockQuote (quote) =>
          Out("<block quote>");
          WalkSequence(quote.content, indent);

      | Image (image) =>
          Out("<image>");
          VAR alt := image.alternate;
          BEGIN
            IF alt = NIL THEN alt := "<<IMAGE>>" END;
            Out(alt);
          END;

      | Oblet (oblet) => Out("<oblet:" & oblet.source & ">");

      | Table (format) =>
          Out("<table>");
          WalkSequence(format.content, indent);

      | TableRow (format) =>
          Out("<table row>");
          WalkSequence(format.content, indent);

      ELSE
        Out("<????>");

      END;
      seq := seq.next;
    END;
    DEC(indent, IndentAmount);
  END WalkSequence;

  BEGIN
    Out("TITLE: " & html.title);
    IF html.body # NIL THEN
      Out("BODY");
      NL(0);
      WalkSequence(html.body, 0);
    END;
  END Dump;

PROCEDURE GetLinks (html: T): TextList.T =
  VAR links: TextList.T;

  PROCEDURE LinksInSequence (seq: Sequence) =
    BEGIN
      WHILE seq # NIL DO
        TYPECASE seq OF

        | NULL =>

        | Word =>

        | Paragraph =>

        | LineBreak =>

        | HorizontalRule =>

        | Glossary (glossary) =>
            VAR g := glossary.content;
            BEGIN
              WHILE g # NIL DO
                LinksInSequence(g.term);
                LinksInSequence(g.definition);
                g := g.next;
              END;
            END;

        | List (list) =>
            VAR item := list.content;
            BEGIN
              WHILE item # NIL DO
                LinksInSequence(item.content);
                item := item.next;
              END;
            END;

        | Preformatted (pre) => LinksInSequence(pre.content);

        | Typewriter (format) => LinksInSequence(format.content);

        | Boldface (format) => LinksInSequence(format.content);

        | Italic (format) => LinksInSequence(format.content);

        | Underline (format) => LinksInSequence(format.content);

        | Emphasis (format) => LinksInSequence(format.content);

        | Strong (format) => LinksInSequence(format.content);

        | Code (format) => LinksInSequence(format.content);

        | Sample (format) => LinksInSequence(format.content);

        | Keyboard (format) => LinksInSequence(format.content);

        | Definition (format) => LinksInSequence(format.content);

        | Variable (format) => LinksInSequence(format.content);

        | Citation (format) => LinksInSequence(format.content);

        | Anchor (anchor) =>
            IF anchor.href # NIL THEN
              VAR pos := Text.FindChar(anchor.href, '#', 0);
              BEGIN
                IF pos = -1 THEN
                  links := TextList.Cons(anchor.href, links);
                END
              END
            END;

        | Heading (heading) => LinksInSequence(heading.content);

        | Address (addr) => LinksInSequence(addr.content);

        | BlockQuote (quote) => LinksInSequence(quote.content);

        | Image (<*NOWARN*> image ) =>
          (* links := TextList.Cons(image.source, links); *)
        | Oblet =>
        ELSE
        END;
        seq := seq.next;
      END;
    END LinksInSequence;
  BEGIN
    links := NIL;
    LinksInSequence(html.body);
    RETURN TextList.ReverseD(links);
  END GetLinks;

BEGIN
END HTML.

interface HTML is in: