Critical Mass Modula-3: libm3/src/types/Unicode.i3

libm3/src/types/Unicode.i3

---------------------------------------------------------------------------*\ * Copyright (c) 2003-2004 by Dirk Muysers. * For details see file COPYRIGHT * * INTERFACE: Unicode.i3 * PURPOSE: Unicode character set * HISTORY: * DATE PROGRAMMER DESCRIPTION * 30-Nov-2003 Dirk Muysers Initial creation * *---------------------------------------------------------------------------- * $Id: Unicode.i3.html,v 1.3 2010-04-29 17:19:04 wagner Exp $ \*---------------------------------------------------------------------------

INTERFACE Unicode;

---------------------------------------------------------------------------*\ * DESCRIPTION: * * Unicode deals with individual characters that belong to the WIDECHAR type. * It includes constant definitions for the character codes of a few exotic * characters, such as Unicode.NL for new-line. It classifies * characters into groups, like digits or punctuation and provides * functions that map lower-case letters into upper-case and the like. * * Please note that this library is surrogate-blind. It considers * surrogates as ordinary --although illegal-- characters und does not * check their correct pairing. * \*---------------------------------------------------------------------------

CONST
  NUL  = W'\x0000'; SOH  = W'\x0001'; STX  = W'\x0002';  ETX  = W'\x0003';
  EOT  = W'\x0004'; ENQ  = W'\x0005'; ACK  = W'\x0006';  BEL  = W'\x0007';
  BS   = W'\x0008'; HT   = W'\x0009'; NL   = W'\x000A';  VT   = W'\x000B';
  NP   = W'\x000C'; CR   = W'\x000D'; SO   = W'\x000E';  SI   = W'\x000F';
  DLE  = W'\x0010'; DC1  = W'\x0011'; DC2  = W'\x0012';  DC3  = W'\x0013';
  DC4  = W'\x0014'; NAK  = W'\x0015'; SYN  = W'\x0016';  ETB  = W'\x0017';
  CAN  = W'\x0018'; EM   = W'\x0019'; SUB  = W'\x001A';  ESC  = W'\x001B';
  FS   = W'\x001C'; GS   = W'\x001D'; RS   = W'\x001E';  US   = W'\x001F';
  SP   = W'\x0020'; DEL  = W'\x007F';
  PAD  = W'\x0080'; HOP  = W'\x0081'; BPH  = W'\x0082';  NBH  = W'\x0083';
  IND  = W'\x0084'; NEL  = W'\x0085'; SSA  = W'\x0086';  ESA  = W'\x0087';
  HTS  = W'\x0088'; HTJ  = W'\x0089'; VTS  = W'\x008A';  PLD  = W'\x008B';
  PLU  = W'\x008C'; RI   = W'\x008D'; SS2  = W'\x008E';  SS3  = W'\x008F';
  DCS  = W'\x0090'; PU1  = W'\x0091'; PU2  = W'\x0092';  STS  = W'\x0093';
  CCH  = W'\x0094'; MW   = W'\x0095'; SPA  = W'\x0096';  EPA  = W'\x0097';
  SOS  = W'\x0098'; SGCI = W'\x0099'; SCI  = W'\x009A';  CSI  = W'\x009B';
  ST   = W'\x009C'; OSC  = W'\x009D'; PM   = W'\x009E';  APC  = W'\x009F';
  NBSP = W'\x00A0';

TYPE T = WIDECHAR;

---------------------------------------------------------------------------*\ * Predicates \*---------------------------------------------------------------------------

PROCEDURE IsDefined (t: T): BOOLEAN;

Returns TRUE if t has a defined meaning in Unicode.


PROCEDURE IsASCII (t: T): BOOLEAN;

Returns TRUE if t corresponds to an ASCII character.


PROCEDURE IsSpace (t: T): BOOLEAN;

Returns TRUE if t is a Unicode white space character.


PROCEDURE IsLetter (t: T): BOOLEAN;

Returns TRUE if t is a Unicode letter.


PROCEDURE IsDigit (t: T): BOOLEAN;

Returns TRUE if t is a Unicode digit.


PROCEDURE IsLetterOrDigit (t: T): BOOLEAN;

== IsLetter(t) OR IsDigit(t).


PROCEDURE IsUpperCase (t: T): BOOLEAN;

Returns TRUE if t is an upper-case Unicode letter.


PROCEDURE IsLowerCase (t: T): BOOLEAN;

Returns TRUE if t is a lower-case Unicode letter.


PROCEDURE IsTitleCase (t: T): BOOLEAN;

Returns TRUE if t is defined as a Unicode title-case letter.

---------------------------------------------------------------------------*\ * Translations \*---------------------------------------------------------------------------

PROCEDURE ToUpperCase (t: T): T;

Return the upper-case equivalent of t if it has one. Otherwise, return t.


PROCEDURE ToLowerCase (t: T): T;

Return the upper-case equivalent of t if it has one. Otherwise, return t.


PROCEDURE ToTitleCase (t: T): T;

Return the title-case equivalent of t if it has one. Otherwise, return t.


PROCEDURE ToDigitValue (t: T): CARDINAL;

Returns the numeric value of the digit t if IsDigit(t) is TRUE. Otherwise, returns LAST (CARDINAL).


END Unicode.

vi: set ai sw=2 ts=2 tw=80: