%--------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%--------------------------------------------------%
% Copyright (C) 1994-2008, 2011 The University of Melbourne.
% Copyright (C) 2013-2015, 2017-2022, 2024-2025 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%--------------------------------------------------%
%
% File: char.m.
% Main author: fjh.
% Stability: high.
%
% This module defines some predicates that manipulate characters.
%
% Originally we used `character' rather than `char' for the type name
% because `char' was used by NU-Prolog to mean something different.
% But now we use `char' and the use of `character' is discouraged.
%
% All predicates and functions exported by this module that deal with
% Unicode conform to version 13 of the Unicode standard.
%
%--------------------------------------------------%
%--------------------------------------------------%
:- module char.
:- interface.
:- import_module enum.
:- import_module list.
:- import_module pretty_printer.
%--------------------------------------------------%
% A Unicode code point.
%
:- type char == character.
:- instance enum(character).
:- instance uenum(character).
% `to_int'/1 and `to_int(in, out)' convert a character to its
% corresponding numerical code (integer value).
%
% `to_int(out, in)' converts an integer value to a character value.
% It fails for integer values outside of the Unicode range.
%
% Be aware that there is no guarantee that characters can be written to
% files or to the standard output or standard error streams. Files using an
% 8-bit national character set would only be able to represent a subset of
% all possible code points. Currently, the Mercury standard library can
% only read and write UTF-8 text files, so the entire range is supported
% (excluding surrogate and noncharacter code points).
%
% Note that '\0' is not accepted as a Mercury null character literal.
% Instead, a null character can be created using `det_from_int(0)'.
% Null characters are not allowed in Mercury strings in C grades.
%
:- func to_int(char) = int.
:- pred to_int(char, int).
:- mode to_int(in, out) is det.
:- mode to_int(in, in) is semidet. % implied
:- mode to_int(out, in) is semidet.
% Converts an integer to its corresponding character, if any.
% A more expressive name for the reverse mode of to_int.
%
:- pred from_int(int::in, char::out) is semidet.
% Converts an integer to its corresponding character.
% Throws an exception if there isn't one.
%
:- func det_from_int(int) = char.
:- pred det_from_int(int::in, char::out) is det.
% Converts a character to its numerical character code (unsigned integer).
%
:- func to_uint(char) = uint.
% Converts an unsigned integer to its corresponding character, if any.
%
:- pred from_uint(uint::in, char::out) is semidet.
% Converts an unsigned integer to its corresponding character.
% Throws an exception if there isn't one.
%
:- func det_from_uint(uint) = char.
% Returns the minimum numerical character code.
%
:- func min_char_value = int.
:- pred min_char_value(int::out) is det.
% Returns the maximum numerical character code.
%
:- func max_char_value = int.
:- pred max_char_value(int::out) is det.
%--------------------------------------------------%
% True if-and-only-if the character is a lowercase letter (a-z)
% in the ASCII range.
%
:- pred is_lower(char::in) is semidet.
% True if-and-only-if the character is an uppercase letter (A-Z)
% in the ASCII range.
%
:- pred is_upper(char::in) is semidet.
% Convert a character to lowercase.
% Note that this only converts letters (A-Z) in the ASCII range.
%
:- func to_lower(char) = char.
:- pred to_lower(char::in, char::out) is det.
% Convert a character to uppercase.
% Note that this only converts letters (a-z) in the ASCII range.
%
:- func to_upper(char) = char.
:- pred to_upper(char::in, char::out) is det.
% lower_upper(Lower, Upper) is true if-and-only-if
% Lower is a lowercase letter (a-z) and Upper is the corresponding
% uppercase letter (A-Z) in the ASCII range.
%
:- pred lower_upper(char, char).
:- mode lower_upper(in, out) is semidet.
:- mode lower_upper(out, in) is semidet.
%--------------------------------------------------%
% True if-and-only-if the character is in the ASCII range (0-127).
%
:- pred is_ascii(char::in) is semidet.
% True if-and-only-if the character is a whitespace character
% in the ASCII range:
%
% U+0020 space
% U+0009 character tabulation (horizontal tab)
% U+000A line feed
% U+000B line tabulation (vertical tab)
% U+000C form feed
% U+000D carriage return
%
:- pred is_whitespace(char::in) is semidet.
% True if-and-only-if the character is a letter (A-Z, a-z)
% in the ASCII range.
%
:- pred is_alpha(char::in) is semidet.
% True if-and-only-if the character is a letter (A-Z, a-z) or digit (0-9)
% in the ASCII range.
%
:- pred is_alnum(char::in) is semidet.
% True if-and-only-if the character is a letter (A-Z, a-z)
% or an underscore (_) in the ASCII range.
%
:- pred is_alpha_or_underscore(char::in) is semidet.
% True if-and-only-if the character is a letter (A-Z, a-z),
% a digit (0-9) or an underscore (_) in the ASCII range.
%
:- pred is_alnum_or_underscore(char::in) is semidet.
%--------------------------------------------------%
% True if-and-only-if the character is a decimal digit (0-9)
% in the ASCII range.
%
:- pred is_digit(char::in) is semidet.
% True if-and-only-if the character is a binary digit (0 or 1)
% in the ASCII range.
%
:- pred is_binary_digit(char::in) is semidet.
% True if-and-only-if the character is an octal digit (0-7)
% in the ASCII range.
%
:- pred is_octal_digit(char::in) is semidet.
% True if-and-only-if the character is a decimal digit (0-9)
% in the ASCII range. Synonym for is_digit/1.
%
:- pred is_decimal_digit(char::in) is semidet.
% True if-and-only-if the character is a hexadecimal digit (0-9, a-f, A-F)
% in the ASCII range.
%
:- pred is_hex_digit(char::in) is semidet.
% is_base_digit(Base, Digit):
% True if-and-only-if Digit is a digit in the given Base (0-9, a-z, A-Z).
% Throws an exception if Base < 2 or Base > 36.
%
:- pred is_base_digit(int::in, char::in) is semidet.
%--------------------------------------------------%
% binary_digit_to_int(Char, Int):
% True if-and-only-if Char is a binary digit (0-1) representing
% the value Int.
%
:- pred binary_digit_to_int(char::in, int::out) is semidet.
% As above, but throws an exception instead of failing.
%
:- func det_binary_digit_to_int(char) = int.
% octal_digit_to_int(Char, Int):
% True if-and-only-if Char is an octal digit (0-7) representing
% the value Int.
%
:- pred octal_digit_to_int(char::in, int::out) is semidet.
% As above, but throws an exception instead of failing.
%
:- func det_octal_digit_to_int(char) = int.
% decimal_digit_to_int(Char, Int):
% True if-and-only-if Char is a decimal digit (0-9) representing
% the value Int.
%
:- pred decimal_digit_to_int(char::in, int::out) is semidet.
% As above, but throws an exception instead of failing.
%
:- func det_decimal_digit_to_int(char) = int.
% hex_digit_to_int(Char, Int):
% True if-and-only-if Char is a hexadecimal digit (0-9, a-z or A-F)
% representing the value Int.
%
:- pred hex_digit_to_int(char::in, int::out) is semidet.
% As above, but throws an exception instead of failing.
%
:- func det_hex_digit_to_int(char) = int.
% base_digit_to_int(Base, Char, Int):
% True if-and-only-if Char is a decimal digit (0-9) or a letter (a-z, A-Z)
% representing the value Int (0-35) in the given base.
% Throws an exception if Base < 2 or Base > 36.
%
:- pred base_digit_to_int(int::in, char::in, int::out) is semidet.
% As above, but throws an exception instead of failing.
%
:- func det_base_digit_to_int(int, char) = int.
% A version of base_digit_to_int that does not check whether
% Base is in the range 2 to 36. If it is not, the behavior is undefined.
%
:- pred unsafe_base_digit_to_int(int::in, char::in, int::out) is semidet.
%--------------------------------------------------%
% Convert an integer in the range 0-1 to a binary digit (0 or 1) in the
% ASCII range.
%
:- pred int_to_binary_digit(int::in, char::out) is semidet.
% As above, but throw an exception instead of failing.
%
:- func det_int_to_binary_digit(int) = char.
% Convert an integer 0-7 to an octal digit (0-7) in the ASCII range.
%
:- pred int_to_octal_digit(int::in, char::out) is semidet.
% As above, but throw an exception instead of failing.
%
:- func det_int_to_octal_digit(int) = char.
% Convert an integer 0-9 to a decimal digit (0-9) in the ASCII range.
%
:- pred int_to_decimal_digit(int::in, char::out) is semidet.
% As above, but throw an exception in instead of failing.
%
:- func det_int_to_decimal_digit(int) = char.
% Convert an integer 0-15 to an uppercase hexadecimal digit (0-9, A-F) in
% the ASCII range.
%
:- pred int_to_hex_digit(int::in, char::out) is semidet.
% As above, but throw an exception in instead of failing.
%
:- func det_int_to_hex_digit(int) = char.
% base_int_to_digit(Base, Int, Char):
% True if-and-only-if Char is a decimal digit (0-9) or an uppercase letter
% (A-Z) representing the value Int (0-35) in the given base.
% Throws an exception if Base < 2 or Base > 36.
%
:- pred base_int_to_digit(int::in, int::in, char::out) is semidet.
% As above, but throw an exception instead of failing.
%
:- func det_base_int_to_digit(int, int) = char.
%--------------------------------------------------%
% Encode a Unicode code point in UTF-8.
% Fails for surrogate code points.
%
:- pred to_utf8(char::in, list(int)::out) is semidet.
% As above, but represent UTF-8 code units using uint8s.
%
:- pred to_utf8_uint8(char::in, list(uint8)::out) is semidet.
% Encode a Unicode code point in UTF-16 (native endianness).
% Fails for surrogate code points.
%
:- pred to_utf16(char::in, list(int)::out) is semidet.
% As above, but represent UTF-16 code units using uint16s.
%
:- pred to_utf16_uint16(char::in, list(uint16)::out) is semidet.
% True if-and-only-if the character is a Unicode Surrogate code point,
% that is a code point in General Category `Other,surrogate' (`Cs').
% In UTF-16, a code point with a scalar value greater than 0xffff is
% encoded with a pair of surrogate code points.
%
:- pred is_surrogate(char::in) is semidet.
% True if-and-only-if the character is a Unicode leading surrogate
% code point. A leading surrogate code point is in the inclusive range
% from 0xd800 to 0xdbff.
%
:- pred is_leading_surrogate(char::in) is semidet.
% True if-and-only-if the character is a Unicode trailing surrogate
% code point. A trailing surrogate code point is in the inclusive range
% from 0xdc00 to 0xdfff.
%
:- pred is_trailing_surrogate(char::in) is semidet.
% True if-and-only-if the character is a Unicode Noncharacter code point.
% Sixty-six code points are not used to encode characters.
% These code points should not be used for interchange, but may be used
% internally.
%
:- pred is_noncharacter(char::in) is semidet.
% True if-and-only-if the character is a Unicode Control code point,
% that is a code point in General Category `Other,control' (`Cc').
%
:- pred is_control(char::in) is semidet.
% True if-and-only-if the character is a Unicode Space Separator
% code point, that is a code point in General Category
% `Separator,space' (`Zs').
%
:- pred is_space_separator(char::in) is semidet.
% True if-and-only-if the character is a Unicode Line Separator code point,
% that is a code point in General Category `Separator,line' (`Zl').
%
:- pred is_line_separator(char::in) is semidet.
% True if-and-only-if the character is a Unicode Paragraph Separator
% code point, that is a code point in General Category
% `Separator,paragraph' (`Zp').
%
:- pred is_paragraph_separator(char::in) is semidet.
% True if-and-only-if the character is a Unicode Private-use code point,
% that is a code point in General Category `Other,private use' (`Co').
%
:- pred is_private_use(char::in) is semidet.
%--------------------------------------------------%
% Convert a char to a pretty_printer.doc for formatting.
%
:- func char_to_doc(char) = pretty_printer.doc.
:- pragma obsolete(func(char_to_doc/1), [pretty_printer.char_to_doc/1]).
%--------------------------------------------------%
% The following have all been deprecated.
% Use hex_digit_to_int/2 instead.
%
:- pred is_hex_digit(char, int).
:- mode is_hex_digit(in, out) is semidet.
% Convert an integer 0-15 to a hexadecimal digit (0-9, A-F) in the ASCII
% range.
%
% Use int_to_hex_digit/2 instead.
%
:- pred int_to_hex_char(int, char).
:- mode int_to_hex_char(in, out) is semidet.
%--------------------------------------------------%
%
% Computing hashes of chars.
%
% Compute a hash value for a char.
%
:- func hash(char) = int.
:- pred hash(char::in, int::out) is det.
%--------------------------------------------------%
%--------------------------------------------------%