%--------------------------------------------------%
% vim: ts=4 sw=4 expandtab ft=mercury
%--------------------------------------------------%
% Copyright (C) 2007, 2009-2011 The University of Melbourne
% Copyright (C) 2014-2016, 2018, 2020, 2022-2025 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%--------------------------------------------------%
%
% File: pretty_printer.m.
% Main author: rafe
% Stability: high.
%
% This module defines a doc type for formatting and a pretty printer for
% displaying docs.
%
% The doc type includes data constructors for outputting strings, newlines,
% forming groups, indented blocks, and arbitrary values.
%
% The key feature of the algorithm is this: newlines in a group are ignored if
% the group can fit on the remainder of the current line. (The algorithm is
% similar to those of Oppen and Wadler, although it uses neither coroutines or
% laziness.)
%
% When a newline is printed, indentation is also output according to the
% current indentation level.
%
% The pretty printer includes special support for formatting Mercury style
% terms in a way that respects Mercury's rules for operator precedence and
% bracketing.
%
% The pretty printer takes a parameter specifying a collection of user-defined
% formatting functions for handling certain types rather than using the
% default built-in mechanism. This allows one to, say, format maps as
% sequences of (key -> value) pairs rather than exposing the underlying
% 234-tree structure.
%
% The amount of output produced is controlled via limit parameters.
% Three kinds of limits are supported: the output line width, the maximum
% number of lines to be output, and a limit on the depth for formatting
% arbitrary terms. Output is replaced with ellipsis ("...") when a limit
% has been exceeded.
%
%--------------------------------------------------%
:- module pretty_printer.
:- interface.
:- import_module array.
:- import_module char.
:- import_module deconstruct.
:- import_module io.
:- import_module list.
:- import_module one_or_more.
:- import_module stream.
:- import_module string.
:- import_module string.builder.
:- import_module tree234.
:- import_module type_desc.
:- import_module univ.
:- import_module version_array.
%--------------------------------------------------%
:- type doc
---> str(string)
% Output a literal string. This string should not contain newlines,
% hard tabs, or other formatting characters other than spaces;
% if it does, the resulting output will almost certainly look
% strange.
; nl
% Output a newline, followed by indentation, if and only if
% - the enclosing group does not fit on the current line, and
% - starting a new line adds more space.
; hard_nl
% Always outputs a newline, followed by indentation.
; docs(list(doc))
% An embedded sequence of docs.
; format_univ(univ)
% Use a specialised formatter on the given value if
% is available for its type. Otherwise, use the generic formatter.
; format_list(list(univ), doc)
% Pretty print a list of items using the given doc as a separator
% between each pair of items.
; format_term(string, list(univ))
% Pretty print a term with zero or more arguments. If the term
% corresponds to a Mercury operator, it will be printed with
% appropriate fixity and, if necessary, in parentheses. The term
% name will be quoted and escaped if necessary.
; format_susp((func) = doc)
% The argument is a suspended computation that, if evaluated,
% will produce a doc to print. The evaluation must materialize
% at least one part of this doc, but other parts may remain
% in the form of other suspensions. This will produce a final
% doc in a lazy fashion, if needed. The *point* of producing the
% doc lazily is that when the formatting limit is reached,
% then the prettyprinter will just output "...", and will do so
% *without* evaluating any remaining suspensions. This is useful
% for formatting large structures without using more resources
% than required. Note that expanding a suspended computation
% reduces the formatting limit by one.
; pp_internal(pp_internal).
% pp_internal docs are used in the implementation, and cannot be
% exploited by user code.
:- type docs == list(doc).
% This type is private to the implementation and cannot be exploited
% by user code.
%
:- type pp_internal.
%--------------------------------------------------%
%
% Functions for constructing docs.
%
% indent(IndentString, Docs):
%
% Append IndentString to the current indentation while printing Docs.
% Indentation is printed after each newline that is output.
%
:- func indent(string, list(doc)) = doc.
% indent(Docs) = indent(" ", Docs).
%
% A convenient abbreviation.
%
:- func indent(list(doc)) = doc.
% group(Docs):
%
% If Docs can be output on the remainder of the current line by ignoring
% any nls in Docs, then do so. Otherwise nls in Docs are printed
% (followed by any indentation). The formatting test is applied recursively
% for any subgroups in Docs.
%
:- func group(list(doc)) = doc.
% format(X) = format_univ(univ(X)):
%
% A convenient abbreviation.
%
:- func format(T) = doc.
% format_arg(Doc) has the effect of formatting any term in Doc as though
% it were an argument in a Mercury term, by enclosing it in parentheses
% if necessary.
%
:- func format_arg(doc) = doc.
%--------------------------------------------------%
%
% Functions for converting docs to strings and writing them out to streams.
%
% write_doc_formatted(X, !IO):
% write_doc_formatted(FileStream, X, !IO):
%
% Convert X to a doc using the format function, and then
% call write_doc on the result.
%
:- pred write_doc_formatted(T::in, io::di, io::uo) is det.
:- pred write_doc_formatted(io.text_output_stream::in, T::in,
io::di, io::uo) is det.
% write_doc(Doc, !IO):
% write_doc(FileStream, Doc, !IO):
%
% Format Doc to io.stdout_stream or FileStream respectively using put_doc,
% with include_details_cc, the default formatter_map, and the default
% pp_params.
%
:- pred write_doc(doc::in, io::di, io::uo) is det.
:- pred write_doc(io.text_output_stream::in, doc::in, io::di, io::uo) is det.
% put_doc(Stream, Canonicalize, FMap, Params, Doc, !State):
%
% Format Doc to Stream. Format format_univ(_) docs using specialised
% formatters Formatters, and using Params as the pretty printer parameters.
% The Canonicalize argument controls how put_doc deconstructs values
% of noncanonical types (see the documentation of the noncanon_handling
% type for details).
%
:- pred put_doc(Stream, noncanon_handling, formatter_map, pp_params, doc,
State, State) <= stream.writer(Stream, string, State).
:- mode put_doc(in, in(canonicalize), in, in, in, di, uo) is det.
:- mode put_doc(in, in(include_details_cc), in, in, in, di, uo) is cc_multi.
:- pragma type_spec_constrained_preds(
[stream.writer(Stream, string, State)],
apply_to_superclasses,
[subst([Stream => io.text_output_stream, State = io.state]),
subst([Stream => string.builder.handle, State = string.builder.state])]).
%--------------------------------------------------%
%
% Mechanisms for controlling *how* docs are converted to strings.
%
% The type of generic formatting functions.
% The first argument is the univ of the value to be formatted.
% The type of this value will have the form "TC(AT1, AT2, ..., ATn)",
% where TC is a type constructor, and ATi are its argument types.
% The second argument of the function will consist of the list of
% type descriptors describing AT1, AT2, ... ATn.
%
% These arguments are intended to be used as shown by this example
% function, which can be the entry for the type constructor tree234(K, V):
%
% fmt_tree234(Univ, ArgDescs) =
% ( if
% ArgDescs = [ArgDescA, ArgDescB],
% has_type(_ArgA : K, ArgDescA),
% has_type(_ArgB : V, ArgDescB),
% Value = univ_value(Univ),
% dynamic_cast(Value, Tree : tree234(K, V))
% then
% pretty_printer.tree234_to_doc(Tree)
% else
% str("internal error: expected a tree234, did not get it")
% ).
%
% Since the tree234 type constructor has arity two, the caller will pass
% two type descriptors to fmt_tree234, which will describe the actual types
% of the keys and values in *this* tree. The two calls to has_type
% (which is defined in the type_desc module of the Mercury standard
% library) tell the compiler that the type variables K and V in *this*
% function should stand for the ground types described by ArgDescA
% and ArgDescB respectively.
%
% After the call to univ_value picks the value out of Univ, the call to
% dynamic_cast (which is defined in the builtin module of the Mercury
% standard library) checks whether the type of Value is tree234(K, V),
% and if it is, (which it should be, since the predicates and functions
% of this module would not have called fmt_tree234 otherwise), will return
% Value as Tree. Note that the difference between Value and Tree is that
%
% - the compiler does not know the type of Value statically, since that
% information comes from Univ, which is available only at runtime, but
%
% - the compiler *does* know the type of Tree statically, due to the type
% annotation on it. This type, tree234(K, V), does contain type
% variables, but its principal type constructor is known, and that is
% enough for the code in the then-part of the if-then-else to do its job.
%
% Note that the code in the else-part should not matter. If that code
% is ever executed, that would mean that a predicate or function of
% this module has called fmt_tree234 with inappropriate data.
%
:- type formatter == (func(univ, list(type_desc)) = doc).
% A formatter_map maps type constructors to formatters.
%
% If the principal (outermost) type constructor of a value's type
% has an entry in the formatter_map given to one of the prettyprinting
% predicates or functions below, then that predicate or function will use
% the corresponding formatter to format that value.
%
:- type formatter_map.
% Formatter maps identify type constructors by
%
% - the name of the module that defines the type constructor,
% - the type constructor's name, and
% - the type constructor's arity.
%
% The three fields contain this info in this order.
%
:- type formatter_map_entry
---> formatter_map_entry(string, string, int).
% ModuleName.TypeName/TypeArity.
% Construct a new formatter_map.
%
:- func new_formatter_map = formatter_map.
% set_formatter(ModuleName, TypeName, TypeArity, Formatter, !FMap):
%
% Update !FMap to use Formatter to format values whose type is
% ModuleName.TypeName/TypeArity.
%
:- pred set_formatter(string::in, string::in, int::in, formatter::in,
formatter_map::in, formatter_map::out) is det.
:- func get_formatter_map_entry_types(formatter_map) =
list(formatter_map_entry).
%--------------------------------------------------%
% The func_symbol_limit type controls *how many* of the function symbols
% stored in the term inside a format_univ, format_list, or format_term doc
% the write_doc family of functions should include in the resulting string.
%
% A limit of linear(N) formats the first N functors before truncating
% output to "...".
%
% A limit of triangular(N) formats a term t(X1, ..., Xn) by applying
% the following limits:
%
% - triangular(N - 1) when formatting X1,
% - triangular(N - 2) when formatting X2,
% - ..., and
% - triangular(N - n) when formatting Xn.
%
% The cost of formatting the term t(X1, ..., Xn) as a whole is just one,
% so a sequence of terms T1, T2, ... is formatted with limits
% triangular(N), triangular(N - 1), ... respectively. When the limit
% is exhausted, terms are output as just "...".
%
:- type func_symbol_limit
---> linear(int)
; triangular(int).
% The pp_params type contains the parameters of the prettyprinting process:
%
% - the width of each line,
% - the maximum number of lines to print, and
% - the controls for how many function symbols to print.
%
:- type pp_params
---> pp_params(
pp_line_width :: int,
pp_max_lines :: int,
pp_limit :: func_symbol_limit
).
%--------------------------------------------------%
% A user-configurable default set of type-specific formatters and
% formatting parameters is always attached to the I/O state.
% The write_doc predicate (in both its arities) uses these settings.
%
% The get_default_formatter_map predicate reads the default formatter_map
% from the current I/O state, while set_default_formatter_map writes
% the specified formatter_map to the I/O state to become the new default.
%
% The initial value of the default formatter_map provides the means
% to prettyprint the most commonly used types in the Mercury standard
% library, such as arrays, chars, floats, ints, maps, strings, etc.
%
% The default formatter_map may also be updated by users' modules
% (e.g. in initialisation goals).
%
:- pred get_default_formatter_map(formatter_map::out, io::di, io::uo) is det.
:- pred set_default_formatter_map(formatter_map::in, io::di, io::uo) is det.
% set_default_formatter(ModuleName, TypeName, TypeArity, Formatter, !IO):
%
% Update the default formatter in the I/O state to use Formatter
% to print values of the type ModuleName.TypeName/TypeArity.
%
:- pred set_default_formatter(string::in, string::in, int::in, formatter::in,
io::di, io::uo) is det.
% Alongside the default formatter_map, the I/O state also always stores
% a default set of pretty-printing parameters (pp_params) for use by
% the write_doc predicate (in both its arities).
%
% The get_default_params predicate reads the default parameters
% from the current I/O state, while set_default_params writes the specified
% parameters to the I/O state to become the new default.
%
% The initial default parameters are pp_params(78, 100, triangular(100)).
%
:- pred get_default_params(pp_params::out, io::di, io::uo) is det.
:- pred set_default_params(pp_params::in, io::di, io::uo) is det.
%--------------------------------------------------%
% Convert a char to a doc.
%
:- func char_to_doc(char) = doc.
% Convert a string to a doc.
%
:- func string_to_doc(string) = doc.
% Convert a float to a doc.
%
:- func float_to_doc(float) = doc.
% Convert an int to a doc.
%
:- func int_to_doc(int) = doc.
:- func int8_to_doc(int8) = doc.
:- func int16_to_doc(int16) = doc.
:- func int32_to_doc(int32) = doc.
:- func int64_to_doc(int64) = doc.
% Convert a uint to a doc.
%
:- func uint_to_doc(uint) = doc.
:- func uint8_to_doc(uint8) = doc.
:- func uint16_to_doc(uint16) = doc.
:- func uint32_to_doc(uint32) = doc.
:- func uint64_to_doc(uint64) = doc.
% Convert a list to a doc.
%
:- func list_to_doc(list(T)) = doc.
% Convert a nonempty list to a doc.
%
:- func one_or_more_to_doc(one_or_more(T)) = doc.
% Convert a 2-3-4 tree to a doc.
%
:- func tree234_to_doc(tree234(K, V)) = doc.
% Convert an array to a doc.
%
:- func array_to_doc(array(T)) = doc.
% Convert a version array to a doc.
%
:- func version_array_to_doc(version_array(T)) = doc.
%--------------------------------------------------%
%--------------------------------------------------%