Next: , Previous: term_io, Up: Top   [Contents]


102 term_to_xml

%--------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%--------------------------------------------------%
% Copyright (C) 1993-2007, 2010-2011 The University of Melbourne.
% Copyright (C) 2014-2015, 2017-2019, 2021 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%--------------------------------------------------%
%
% File: term_to_xml.m.
% Main author: maclarty.
% Stability: low.
%
% This module provides two mechanisms for converting Mercury terms
% to XML documents.
%
% Method 1
% --------
% The first method requires a type to be an instance of the xmlable typeclass
% before values of the type can be written as XML.
% Members of the xmlable typeclass must implement a to_xml method which
% maps values of the type to XML elements.
% The XML elements may contain arbitrary children, comments and data.
%
% Method 2
% --------
% The second method is less flexible than the first, but it allows for the
% automatic generation of a DTD.
% Each functor in a term is given a corresponding well-formed element name
% in the XML document according to a mapping. Some predefined mappings are
% provided, but user defined mappings may also be used.
%
% Method 1 vs. Method 2
% ---------------------
%
% Method 2 can automatically generate DTDs, while method 1 cannot.
%
% Method 1 allows values of a specific type to be mapped to arbitrary XML
% elements with arbitrary children and arbitrary attributes.
% With method 2, each functor in a term can be mapped to only one XML element.
% Method 2 also only allows a selected set of attributes.
%
% Method 1 is useful for mapping a specific type to XML, for example
% mapping terms which represent mathematical expressions to MathML.
% Method 2 is useful for mapping terms of *any* type to XML.
%
% In both methods, the XML document can be annotated with a stylesheet
% reference.
%
%--------------------------------------------------%
%--------------------------------------------------%

:- module term_to_xml.
:- interface.

:- import_module deconstruct.
:- import_module list.
:- import_module maybe.
:- import_module stream.
:- import_module type_desc.

%--------------------------------------------------%
%
% Method 1 interface.
%

    % Instances of this typeclass can be converted to XML.
    %
:- typeclass xmlable(T) where [
    func to_xml(T::in) = (xml::out(xml_doc)) is det
].

    % Values of this type represent either a full XML document
    % or a portion of one.
    %
:- type xml
    --->    elem(
                % An XML element with a name, list of attributes
                % and a list of children.
                element_name    :: string,
                attributes      :: list(attr),
                children        :: list(xml)
            )

    ;       data(string)
            % Textual data. `<', `>', `&', `'' and `"' characters
            % will be replaced by `&lt;', `&gt;', `&amp;', `&apos;'
            % and `&quot;' respectively.

    ;       cdata(string)
            % Data to be enclosed in `<![CDATA[' and `]]>' tags.
            % The string may not contain "]]>" as a substring.
            % If it does, then the generated XML will be invalid.

    ;       comment(string)
            % An XML comment. The comment should not include
            % the `<!--' and `-->'. Any occurrences of the substring "--"
            % will be replaced by " - ", since "--" is not allowed
            % in XML comments.

    ;       entity(string)
            % An entity reference. The string will have `&' prepended
            % and `;' appended before being output.

    ;       raw(string).
            % Raw XML data. The data will be written out verbatim.

    % An XML document must have an element at the top level.
    % The following inst is used to enforce this restriction.
    %
:- inst xml_doc for xml/0
    --->    elem(
                ground, % element_name
                ground, % attributes
                ground  % children
            ).

    % An element attribute, mapping a name to a value.
    %
:- type attr
    --->    attr(string, string).

    % Values of this type specify the DOCTYPE of an XML document when
    % the DOCTYPE is defined by an external DTD.
    %
:- type doctype
    --->    public(string)                  % Formal Public Identifier (FPI)
    ;       public_system(string, string)   % FPI, URL
    ;       system(string).                 % URL

    % Values of this type specify whether a DTD should be included in
    % a generated XML document, and if so, how.
    %
:- type maybe_dtd
    --->    embed_dtd
            % Generate and embed the entire DTD in the document
            % (only available for method 2).

    ;       external_dtd(doctype)
            % Included a reference to an external DTD.

    ;       no_dtd.
            % Do not include any DOCTYPE information.

:- inst non_embedded_dtd for maybe_dtd/0
    --->    external_dtd(ground)
    ;       no_dtd.

    % Values of this type indicate whether a stylesheet reference should be
    % included in a generated XML document.
    %
:- type maybe_stylesheet
    --->    with_stylesheet(
                stylesheet_type :: string, % For example "text/xsl"
                stylesheet_href :: string
            )
    ;       no_stylesheet.

    % write_xml_doc(Stream, Term, !State):
    %
    % Output Term as an XML document to the given stream.
    % Term must be an instance of the xmlable typeclass.
    %
:- pred write_xml_doc(Stream::in, T::in, State::di, State::uo)
    is det <= (xmlable(T), stream.writer(Stream, string, State)).

    % write_xml_doc_style_dtd(Stream, Term, MaybeStyleSheet, MaybeDTD, !State):
    %
    % Write Term to the given stream as an XML document.
    % MaybeStyleSheet and MaybeDTD specify whether or not a stylesheet
    % reference and/or a DTD should be included.
    % Using this predicate, only external DTDs can be included, i.e.
    % a DTD cannot be automatically generated and embedded
    % (that feature is available only for method 2 -- see below).
    %
:- pred write_xml_doc_style_dtd(Stream::in, T::in, maybe_stylesheet::in,
    maybe_dtd::in(non_embedded_dtd), State::di, State::uo) is det
    <= (xmlable(T), stream.writer(Stream, string, State)).

    % write_xml_header(Stream, MaybeEncoding, !State):
    %
    % Write an XML header (i.e. `<?xml version="1.0"?>) to the
    % current file output stream.
    % If MaybeEncoding is yes(Encoding), then include `encoding="Encoding"'
    % in the header.
    %
:- pred write_xml_header(Stream::in, maybe(string)::in,
    State::di, State::uo) is det <= stream.writer(Stream, string, State).

    % write_xml_element(Stream, Indent, Term, !State):
    %
    % Write Term out as XML to the given stream, using Indent as the
    % indentation level (each indentation level is one tab character).
    % No `<?xml ... ?>' header will be written.
    % This is useful for generating large XML documents piecemeal.
    %
:- pred write_xml_element(Stream::in, int::in, T::in,
    State::di, State::uo) is det
    <= (xmlable(T), stream.writer(Stream, string, State)).

%--------------------------------------------------%
%
% Method 2 interface.
%

    % Values of this type specify which mapping from functors to elements
    % to use when generating XML. The role of a mapping is twofold:
    %   1. To map functors to elements, and
    %   2. To map functors to a set of attributes that should be
    %      generated for the corresponding element.
    %
    % We provide two predefined mappings:
    %
    %   1. simple: The functors `[]', `[|]' and `{}' are mapped to the elements
    %   `List', `Nil' and `Tuple' respectively. Arrays are assigned the
    %   `Array' element. The builtin types are assigned the elements `Int',
    %   `Int8', `Int16', `Int32' `Int64', `UInt', `UInt8', `UInt16, `UInt32',
    %   `UInt64', `String', `Float' and `Char'. All other functors are assigned
    %   elements with the same name as the functor provided the functor name is
    %   well formed and does not start with a capital letter. Otherwise, a
    %   mangled version of the functor name is used.
    %
    %   All elements except those corresponding to builtin types will have
    %   their `functor', `arity', `type' and `field' (if there is a field name)
    %   attributes set. Elements corresponding to builtin types will just have
    %   their `type' and possibly their `field' attributes set.
    %
    %   The `simple' mapping is designed to be easy to read and use, but
    %   may result in the same element being assigned to different functors.
    %
    %   2. unique: Here we use the same mapping as `simple' except we append
    %   the functor arity for discriminated unions and a mangled version
    %   of the type name for every element. The same attributes as the
    %   `simple' scheme are provided. The advantage of this scheme is that
    %   it maps each functor to a unique element. This means that it will
    %   always be possible to generate a DTD using this mapping so long as
    %   there is only one top level functor and no unsupported types
    %   can appear in terms of the type.
    %
    % A custom mapping can be provided using the `custom' functor. See the
    % documentation for the element_pred type below for more information.
    %
:- type element_mapping
    --->    simple
    ;       unique
    ;       custom(element_pred).

:- inst element_mapping for element_mapping/0
    --->    simple
    ;       unique
    ;       custom(element_pred).

    % Deterministic procedures with the following signature can be used as
    % custom functor to element mappings. The inputs to the procedure are
    % a type and some information about a functor for that type if the type
    % is a discriminated union. The output should be a well formed XML element
    % name and a list of attributes that should be set for that element.
    % See the types `maybe_functor_info' and `attr_from_source' below.
    %
:- type element_pred == (pred(type_desc, maybe_functor_info, string,
    list(attr_from_source))).

:- inst element_pred == (pred(in, in, out, out) is det).

    % Values of this type are passed to custom functor-to-element mapping
    % predicates to tell the predicate which functor to generate
    % an element name for if the type is a discriminated union.
    % If the type is not a discriminated union, then non_du is passed
    % to the predicate when requesting an element for the type.
    %
:- type maybe_functor_info
    --->    du_functor(
                % The functor's name and arity.
                functor_name    :: string,
                functor_arity   :: int
            )

    ;       non_du.
            % The type is not a discriminated union.

    % Values of this type specify attributes that should be set from
    % a particular source. The attribute_name field specifies the name
    % of the attribute in the generated XML and the attribute_source
    % field indicates where the attribute's value should come from.
    %
:- type attr_from_source
    --->    attr_from_source(
                attr_name   :: string,
                attr_source :: attr_source
            ).

    % Possible attribute sources.
    %
:- type attr_source
    --->    functor
            % The original functor name as returned by
            % deconstruct.deconstruct/5.

    ;       field_name
            % The field name, if the functor appears in a named field.
            % (If the field is not named, this attribute is omitted.)

    ;       type_name
            % The fully qualified type name the functor is for.

    ;       arity.
            % The arity of the functor as returned by
            % deconstruct.deconstruct/5.

    % To support third parties generating XML which is compatible with the
    % XML generated using method 2, a DTD for a Mercury type can also be
    % generated. A DTD for a given type and functor-to-element mapping may
    % be generated provided the following conditions hold:
    %
    %   1. If the type is a discriminated union, then there must be only one
    %   top-level functor for the type. This is because the top level functor
    %   will be used to generate the document type name.
    %
    %   2. The functor-to-element mapping must map each functor to a
    %   unique element name for every functor that could appear in
    %   terms of the type.
    %
    %   3. Only types whose terms consist of discriminated unions,
    %   arrays and the builtin types `int', `string', `character' and
    %   `float' can be used to automatically generate DTDs.
    %   Existential types are not supported either.
    %
    % The generated DTD is also a good reference when creating a stylesheet
    % as it contains comments describing the mapping from functors to elements.
    %
    % Values of the following type indicate whether a DTD was successfully
    % generated or not.
    %
:- type dtd_generation_result
    --->    ok

    ;       multiple_functors_for_root
            % The root type is a discriminated union with multiple functors.

    ;       duplicate_elements(
                % The functor-to-element mapping maps different functors
                % to the same element. The arguments identify the duplicate
                % element and a list of the types whose functors map
                % to that element.
                duplicate_element   :: string,
                duplicate_types     :: list(type_desc)
            )

    ;       unsupported_dtd_type(type_desc)
            % At the moment we only support generation of DTDs for types
            % made up of discriminated unions, arrays, strings, ints,
            % characters and floats. If a component type is not supported,
            % then it is returned as the argument of this functor.

    ;       type_not_ground(pseudo_type_desc).
            % If one of the arguments of a functor is existentially typed,
            % then the pseudo_type_desc for the existentially quantified
            % argument is returned as the argument of this functor.
            % Since the values of existentially typed arguments can be of
            % any type (provided any typeclass constraints are satisfied),
            % it is not generally possible to generate DTD rules for functors
            % with existentially typed arguments.

    % write_xml_doc_general(Stream, Term, ElementMapping,
    %   MaybeStyleSheet, MaybeDTD, DTDResult, !State):
    %
    % Write Term to the given stream as an XML document using ElementMapping
    % as the scheme to map functors to elements. MaybeStyleSheet and MaybeDTD
    % specify whether or not a stylesheet reference and/or a DTD should be
    % included. Any non-canonical terms will be canonicalized. If an embedded
    % DTD is requested, but it is not possible to generate a DTD for Term
    % using ElementMapping, then a value other than `ok' is returned in
    % DTDResult and nothing is written out. See the dtd_generation_result type
    % for a list of the other possible values of DTDResult and their meanings.
    %
:- pred write_xml_doc_general(Stream::in, T::in,
    element_mapping::in(element_mapping), maybe_stylesheet::in,
    maybe_dtd::in, dtd_generation_result::out, State::di, State::uo) is det
    <= stream.writer(Stream, string, State).

    % write_xml_doc_general_cc(Stream, Term, ElementMapping, MaybeStyleSheet,
    %    MaybeDTD, DTDResult, !State):
    %
    % Write Term to the current file output stream as an XML document using
    % ElementMapping as the scheme to map functors to elements.
    % MaybeStyleSheet and MaybeDTD specify whether or not a stylesheet
    % reference and/or a DTD should be included. Any non-canonical terms
    % will be written out in full. If an embedded DTD is requested, but
    % it is not possible to generate a DTD for Term using ElementMapping,
    % then a value other than `ok' is returned in DTDResult and nothing is
    % written out. See the dtd_generation_result type for a list of the
    % other possible values of DTDResult and their meanings.
    %
:- pred write_xml_doc_general_cc(Stream::in, T::in,
    element_mapping::in(element_mapping), maybe_stylesheet::in,
    maybe_dtd::in, dtd_generation_result::out, State::di, State::uo)
    is cc_multi <= stream.writer(Stream, string, State).

    % write_xml_element_general(Stream, NonCanon, MakeElement, IndentLevel,
    %   Term, !State):
    %
    % Write XML elements for the given term and all its descendents, using
    % IndentLevel as the initial indentation level (each indentation level
    % is one tab character) and using the MakeElement predicate to map
    % functors to elements. No <?xml ... ?> header will be written.
    % Non-canonical terms will be handled according to the value of NonCanon.
    % See the deconstruct module in the standard library for more information
    % on this argument.
    %
:- pred write_xml_element_general(Stream, deconstruct.noncanon_handling,
    element_mapping, int, T, State, State)
    <= stream.writer(Stream, string, State).
:- mode write_xml_element_general(in, in(do_not_allow), in(element_mapping),
    in, in, di, uo) is det.
:- mode write_xml_element_general(in, in(canonicalize), in(element_mapping),
    in, in, di, uo) is det.
:- mode write_xml_element_general(in, in(include_details_cc),
    in(element_mapping), in, in, di, uo) is cc_multi.
:- mode write_xml_element_general(in, in, in(element_mapping),
    in, in, di, uo) is cc_multi.

%--------------------------------------------------%

    % can_generate_dtd(ElementMapping, Type) = Result:
    %
    % Check if a DTD can be generated for the given Type using the
    % functor-to-element mapping scheme ElementMapping. Return `ok' if it
    % is possible to generate a DTD. See the documentation of the
    % dtd_generation_result type for the meaning of the return value when
    % it is not `ok'.
    %
:- func can_generate_dtd(element_mapping::in(element_mapping),
    type_desc::in) = (dtd_generation_result::out) is det.

    % write_dtd(Stream, Term, ElementMapping, DTDResult, !State):
    %
    % Write a DTD for the given term to the current file output stream using
    % ElementMapping to map functors to elements. If a DTD cannot be generated
    % for Term using ElementMapping, then a value other than `ok' is returned
    % in DTDResult and nothing is written. See the dtd_generation_result type
    % for a list of the other possible values of DTDResult and their meanings.
    %
:- pred write_dtd(Stream::in, T::unused,
    element_mapping::in(element_mapping), dtd_generation_result::out,
    State::di, State::uo) is det
    <= stream.writer(Stream, string, State).

    % write_dtd_for_type(Stream, Type, ElementMapping, DTDResult, !State):
    %
    % Write a DTD for the given type to the given stream. If a DTD cannot
    % be generated for Type using ElementMapping then a value other than `ok'
    % is returned in DTDResult and nothing is written. See the
    % dtd_generation_result type for a list of the other possible values
    % of DTDResult and their meanings.
    %
:- pred write_dtd_from_type(Stream::in, type_desc::in,
    element_mapping::in(element_mapping), dtd_generation_result::out,
    State::di, State::uo) is det <= stream.writer(Stream, string, State).

%--------------------------------------------------%
%--------------------------------------------------%


Next: , Previous: term_io, Up: Top   [Contents]