Next: , Previous: pqueue, Up: Top   [Contents]


63 pretty_printer

%--------------------------------------------------%
% vim: ts=4 sw=4 expandtab ft=mercury
%--------------------------------------------------%
% Copyright (C) 2007, 2009-2011 The University of Melbourne
% Copyright (C) 2014-2016, 2018, 2020, 2022-2024 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%--------------------------------------------------%
%
% File: pretty_printer.m
% Main author: rafe
% Stability: medium
%
% This module defines a doc type for formatting and a pretty printer for
% displaying docs.
%
% The doc type includes data constructors for outputting strings, newlines,
% forming groups, indented blocks, and arbitrary values.
%
% The key feature of the algorithm is this: newlines in a group are ignored if
% the group can fit on the remainder of the current line. (The algorithm is
% similar to those of Oppen and Wadler, although it uses neither coroutines or
% laziness.)
%
% When a newline is printed, indentation is also output according to the
% current indentation level.
%
% The pretty printer includes special support for formatting Mercury style
% terms in a way that respects Mercury's rules for operator precedence and
% bracketing.
%
% The pretty printer takes a parameter specifying a collection of user-defined
% formatting functions for handling certain types rather than using the
% default built-in mechanism. This allows one to, say, format maps as
% sequences of (key -> value) pairs rather than exposing the underlying
% 234-tree structure.
%
% The amount of output produced is controlled via limit parameters.
% Three kinds of limits are supported: the output line width, the maximum
% number of lines to be output, and a limit on the depth for formatting
% arbitrary terms. Output is replaced with ellipsis ("...") when a limit
% has been exceeded.
%
%--------------------------------------------------%

:- module pretty_printer.
:- interface.

:- import_module array.
:- import_module char.
:- import_module deconstruct.
:- import_module io.
:- import_module list.
:- import_module one_or_more.
:- import_module stream.
:- import_module string.
:- import_module string.builder.
:- import_module tree234.
:- import_module type_desc.
:- import_module univ.
:- import_module version_array.

%--------------------------------------------------%

:- type doc
    --->    str(string)
            % Output a literal string. This string should not contain newlines,
            % hard tabs, or other formatting characters other than spaces;
            % if it does, the resulting output will almost certainly look
            % strange.

    ;       nl
            % Output a newline, followed by indentation, if and only if
            % - the enclosing group does not fit on the current line, and
            % - starting a new line adds more space.

    ;       hard_nl
            % Always outputs a newline, followed by indentation.

    ;       docs(list(doc))
            % An embedded sequence of docs.

    ;       format_univ(univ)
            % Use a specialised formatter on the given value if
            % is available for its type. Otherwise, use the generic formatter.

    ;       format_list(list(univ), doc)
            % Pretty print a list of items using the given doc as a separator
            % between each pair of items.

    ;       format_term(string, list(univ))
            % Pretty print a term with zero or more arguments. If the term
            % corresponds to a Mercury operator, it will be printed with
            % appropriate fixity and, if necessary, in parentheses. The term
            % name will be quoted and escaped if necessary.

    ;       format_susp((func) = doc)
            % The argument is a suspended computation that, if evaluated,
            % will produce a doc to print. The evaluation must materialize
            % at least one part of this doc, but other parts may remain
            % in the form of other suspensions. This will produce a final
            % doc in a lazy fashion, if needed. The *point* of producing the
            % doc lazily is that when the formatting limit is reached,
            % then the prettyprinter will just output "...", and will do so
            % *without* evaluating any remaining suspensions. This is useful
            % for formatting large structures without using more resources
            % than required. Note that expanding a suspended computation
            % reduces the formatting limit by one.

    ;       pp_internal(pp_internal).
            % pp_internal docs are used in the implementation, and cannot be
            % exploited by user code.

:- type docs == list(doc).

    % This type is private to the implementation and cannot be exploited
    % by user code.
    %
:- type pp_internal.

%--------------------------------------------------%
%
% Functions for constructing docs.
%

    % indent(IndentString, Docs):
    %
    % Append IndentString to the current indentation while printing Docs.
    % Indentation is printed after each newline that is output.
    %
:- func indent(string, list(doc)) = doc.

    % indent(Docs) = indent("  ", Docs).
    %
    % A convenient abbreviation.
    %
:- func indent(list(doc)) = doc.

    % group(Docs):
    %
    % If Docs can be output on the remainder of the current line by ignoring
    % any nls in Docs, then do so. Otherwise nls in Docs are printed
    % (followed by any indentation). The formatting test is applied recursively
    % for any subgroups in Docs.
    %
:- func group(list(doc)) = doc.

    % format(X) = format_univ(univ(X)):
    %
    % A convenient abbreviation.
    %
:- func format(T) = doc.

    % format_arg(Doc) has the effect of formatting any term in Doc as though
    % it were an argument in a Mercury term, by enclosing it in parentheses
    % if necessary.
    %
:- func format_arg(doc) = doc.

%--------------------------------------------------%
%
% Functions for converting docs to strings and writing them out to streams.
%

    % write_doc_formatted(X, !IO):
    % write_doc_formatted(FileStream, X, !IO):
    %
    % Convert X to a doc using the format function, and then
    % call write_doc on the result.
    %
:- pred write_doc_formatted(T::in, io::di, io::uo) is det.
:- pred write_doc_formatted(io.text_output_stream::in, T::in,
    io::di, io::uo) is det.

    % write_doc(Doc, !IO):
    % write_doc(FileStream, Doc, !IO):
    %
    % Format Doc to io.stdout_stream or FileStream respectively using put_doc,
    % with include_details_cc, the default formatter_map, and the default
    % pp_params.
    %
:- pred write_doc(doc::in, io::di, io::uo) is det.
:- pred write_doc(io.text_output_stream::in, doc::in, io::di, io::uo) is det.

    % put_doc(Stream, Canonicalize, FMap, Params, Doc, !State):
    %
    % Format Doc to Stream. Format format_univ(_) docs using specialised
    % formatters Formatters, and using Params as the pretty printer parameters.
    % The Canonicalize argument controls how put_doc deconstructs values
    % of noncanonical types (see the documentation of the noncanon_handling
    % type for details).
    %
:- pred put_doc(Stream, noncanon_handling, formatter_map, pp_params, doc,
    State, State) <= stream.writer(Stream, string, State).
:- mode put_doc(in, in(canonicalize), in, in, in, di, uo) is det.
:- mode put_doc(in, in(include_details_cc), in, in, in, di, uo) is cc_multi.

:- pragma type_spec_constrained_preds(
    [stream.writer(Stream, string, State)],
    apply_to_superclasses,
    [subst([Stream => io.text_output_stream, State = io.state]),
    subst([Stream => string.builder.handle, State = string.builder.state])]).

%--------------------------------------------------%
%
% Mechanisms for controlling *how* docs are converted to strings.
%

    % The type of generic formatting functions.
    % The first argument is the univ of the value to be formatted.
    % The type of this value will have the form "TC(AT1, AT2, ..., ATn)",
    % where TC is a type constructor, and ATi are its argument types.
    % The second argument of the function will consist of the list of
    % type descriptors describing AT1, AT2, ... ATn.
    %
    % These arguments are intended to be used as shown by this example
    % function, which can be the entry for the type constructor tree234(K, V):
    %
    %     fmt_tree234(Univ, ArgDescs) =
    %         ( if
    %             ArgDescs = [ArgDescA, ArgDescB],
    %             has_type(_ArgA : K, ArgDescA),
    %             has_type(_ArgB : V, ArgDescB),
    %             Value = univ_value(Univ),
    %             dynamic_cast(Value, Tree : tree234(K, V))
    %         then
    %             pretty_printer.tree234_to_doc(Tree)
    %         else
    %             str("internal error: expected a tree234, did not get it")
    %         ).
    %
    % Since the tree234 type constructor has arity two, the caller will pass
    % two type descriptors to fmt_tree234, which will describe the actual types
    % of the keys and values in *this* tree. The two calls to has_type
    % (which is defined in the type_desc module of the Mercury standard
    % library) tell the compiler that the type variables K and V in *this*
    % function should stand for the ground types described by ArgDescA
    % and ArgDescB respectively.
    %
    % After the call to univ_value picks the value out of Univ, the call to
    % dynamic_cast (which is defined in the builtin module of the Mercury
    % standard library) checks whether the type of Value is tree234(K, V),
    % and if it is, (which it should be, since the predicates and functions
    % of this module would not have called fmt_tree234 otherwise), will return
    % Value as Tree. Note that the difference between Value and Tree is that
    %
    % - the compiler does not know the type of Value statically, since that
    %   information comes from Univ, which is available only at runtime, but
    %
    % - the compiler *does* know the type of Tree statically, due to the type
    %   annotation on it. This type, tree234(K, V), does contain type
    %   variables, but its principal type constructor is known, and that is
    %   enough for the code in the then-part of the if-then-else to do its job.
    %
    % Note that the code in the else-part should not matter. If that code
    % is ever executed, that would mean that a predicate or function of
    % this module has called fmt_tree234 with inappropriate data.
    %
:- type formatter == (func(univ, list(type_desc)) = doc).

    % A formatter_map maps type constructors to formatters.
    %
    % If the principal (outermost) type constructor of a value's type
    % has an entry in the formatter_map given to one of the prettyprinting
    % predicates or functions below, then that predicate or function will use
    % the corresponding formatter to format that value.
    %
:- type formatter_map.

    % Formatter maps identify type constructors by
    %
    % - the name of the module that defines the type constructor,
    % - the type constructor's name, and
    % - the type constructor's arity.
    %
    % The three fields contain this info in this order.
    %
:- type formatter_map_entry
    --->    formatter_map_entry(string, string, int).
            % ModuleName.TypeName/TypeArity.

    % Construct a new formatter_map.
    %
:- func new_formatter_map = formatter_map.

    % set_formatter(ModuleName, TypeName, TypeArity, Formatter, !FMap):
    %
    % Update !FMap to use Formatter to format values whose type is
    % ModuleName.TypeName/TypeArity.
    %
:- pred set_formatter(string::in, string::in, int::in, formatter::in,
    formatter_map::in, formatter_map::out) is det.

:- func get_formatter_map_entry_types(formatter_map) =
    list(formatter_map_entry).

%--------------------------------------------------%

    % The func_symbol_limit type controls *how many* of the function symbols
    % stored in the term inside a format_univ, format_list, or format_term doc
    % the write_doc family of functions should include in the resulting string.
    %
    % A limit of linear(N) formats the first N functors before truncating
    % output to "...".
    %
    % A limit of triangular(N) formats a term t(X1, ..., Xn) by applying
    % the following limits:
    %
    % - triangular(N - 1) when formatting X1,
    % - triangular(N - 2) when formatting X2,
    % - ..., and
    % - triangular(N - n) when formatting Xn.
    %
    % The cost of formatting the term t(X1, ..., Xn) as a whole is just one,
    % so a sequence of terms T1, T2, ... is formatted with limits
    % triangular(N), triangular(N - 1), ... respectively. When the limit
    % is exhausted, terms are output as just "...".
    %
:- type func_symbol_limit
    --->    linear(int)
    ;       triangular(int).

    % The pp_params type contains the parameters of the prettyprinting process:
    %
    % - the width of each line,
    % - the maximum number of lines to print, and
    % - the controls for how many function symbols to print.
    %
:- type pp_params
    --->    pp_params(
                pp_line_width   :: int,
                pp_max_lines    :: int,
                pp_limit        :: func_symbol_limit
            ).

%--------------------------------------------------%

    % A user-configurable default set of type-specific formatters and
    % formatting parameters is always attached to the I/O state.
    % The write_doc predicate (in both its arities) uses these settings.
    %
    % The get_default_formatter_map predicate reads the default formatter_map
    % from the current I/O state, while set_default_formatter_map writes
    % the specified formatter_map to the I/O state to become the new default.
    %
    % The initial value of the default formatter_map provides the means
    % to prettyprint the most commonly used types in the Mercury standard
    % library, such as arrays, chars, floats, ints, maps, strings, etc.
    %
    % The default formatter_map may also be updated by users' modules
    % (e.g. in initialisation goals).
    %
    % These defaults are thread local, and therefore changes made by one thread
    % to the default formatter_map will not be visible in another thread.
    %
:- pred get_default_formatter_map(formatter_map::out, io::di, io::uo) is det.
:- pred set_default_formatter_map(formatter_map::in, io::di, io::uo) is det.

    % set_default_formatter(ModuleName, TypeName, TypeArity, Formatter, !IO):
    %
    % Update the default formatter in the I/O state to use Formatter
    % to print values of the type ModuleName.TypeName/TypeArity.
    %
:- pred set_default_formatter(string::in, string::in, int::in, formatter::in,
    io::di, io::uo) is det.

    % Alongside the default formatter_map, the I/O state also always stores
    % a default set of pretty-printing parameters (pp_params) for use by
    % the write_doc predicate (in both its arities).
    %
    % The get_default_params predicate reads the default parameters
    % from the current I/O state, while set_default_params writes the specified
    % parameters to the I/O state to become the new default.
    %
    % The initial default parameters are pp_params(78, 100, triangular(100)).
    %
    % These defaults are thread local, and therefore changes made by one thread
    % to the default pp_params will not be visible in another thread.
    %
:- pred get_default_params(pp_params::out, io::di, io::uo) is det.
:- pred set_default_params(pp_params::in, io::di, io::uo) is det.

%--------------------------------------------------%

    % Convert a char to a doc.
    %
:- func char_to_doc(char) = doc.

    % Convert a string to a doc.
    %
:- func string_to_doc(string) = doc.

    % Convert a float to a doc.
    %
:- func float_to_doc(float) = doc.

    % Convert an int to a doc.
    %
:- func int_to_doc(int) = doc.
:- func int8_to_doc(int8) = doc.
:- func int16_to_doc(int16) = doc.
:- func int32_to_doc(int32) = doc.
:- func int64_to_doc(int64) = doc.

    % Convert a uint to a doc.
    %
:- func uint_to_doc(uint) = doc.
:- func uint8_to_doc(uint8) = doc.
:- func uint16_to_doc(uint16) = doc.
:- func uint32_to_doc(uint32) = doc.
:- func uint64_to_doc(uint64) = doc.

    % Convert an array to a doc.
    %
:- func array_to_doc(array(T)) = doc.

    % Convert a list to a doc.
    %
:- func list_to_doc(list(T)) = doc.

    % Convert a nonempty list to a doc.
    %
:- func one_or_more_to_doc(one_or_more(T)) = doc.

    % Convert a 2-3-4 tree to a doc.
    %
:- func tree234_to_doc(tree234(K, V)) = doc.

    % Convert a version array to a doc.
    %
:- func version_array_to_doc(version_array(T)) = doc.

%--------------------------------------------------%
%--------------------------------------------------%


Next: , Previous: pqueue, Up: Top   [Contents]