Next: , Previous: solutions, Up: Top   [Contents]


85 sparse_bitset

%--------------------------------------------------%
% vim: ts=4 sw=4 et ft=mercury
%--------------------------------------------------%
% Copyright (C) 2000-2007, 2011-2012 The University of Melbourne.
% Copyright (C) 2014-2024 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%--------------------------------------------------%
%
% File: sparse_bitset.m.
% Author: stayl.
% Stability: medium.
%
% This module provides an abstract data type for storing sets of items
% that can each be represented by non-negative integers.
% If the integers being stored are closely grouped, a sparse_bitset
% will be much more compact than either the list-of-elements representations
% provided by set.m, set_ordlist.m, and set_unordlist.m, or the
% tree-of-elements representations provided by set_bbbtree.m, set_tree234.
% or set_ctree234.m.
%
% A sparse bitset is represented as a sorted list, with each element
% of this list containing two unsigned integers: Offset and Bits.
% Offset will always be a multiple of uint.ubits_per_uint, and
% the bits of Bits describe which of the elements of the range
% Offset .. (Offset + ubits_per_uint - 1) are in the set.
% The value of Bits must not be zero; any operation that would clear
% all the bits in Bits must also delete the whole list element.
% As one goes from the head towards the tail of the list, the offsets of
% the list elements must strictly increase.
%
% The values of Offset in the list need not be *contiguous* multiples
% of ubits_per_uint, hence the name *sparse* bitset.
%
% A sparse_bitset is suitable for storing sets of integers which
% can be represented using only a few Offset/Bits pairs.
% In the worst case, where the integers stored are not closely grouped,
% a sparse_bitset will take more memory than an ordinary set, but
% the operations should not be too much slower.
%
% In the asymptotic complexities of the operations below,
% `rep_size(Set)' is the number of Offset/Bits pairs needed to represent Set,
% and `card(Set)' is the cardinality of Set (i.e. its number of elements).
%
%--------------------------------------------------%
%
% There are three other modules in the Mercury standard library that
% represent sets using similar data structures. They are
%
% - the tree_bitset module,
% - the fat_sparse_bitset module, and
% - the fatter_sparse_bitset module.
%
% The comment at the top of tree_bitset.m explains
%
% - how its data structure differ from the data structure described above,
%   which is the base on top of which they each impose their own variations,
%   and
%
% - what objective those differences are intended to achieve.
%
% The comment at the top of fatter_sparse_bitset.m does the same
% for the representations used in fat_sparse_bitset.m as well as
% fatter_sparse_bitset.m.
%
%--------------------------------------------------%

:- module sparse_bitset.
:- interface.

:- import_module enum.
:- import_module list.
:- import_module term.

:- use_module set.

%--------------------------------------------------%

:- type sparse_bitset(T). % <= uenum(T).

%--------------------------------------------------%
%
% Initial creation of sets.
%

    % Return an empty set.
    %
:- func init = sparse_bitset(T).
:- pred init(sparse_bitset(T)::out) is det.

    % Note: set.m contains the reverse mode of this predicate, but it is
    % difficult to implement both modes using the representation in this
    % module.
    %
:- pred singleton_set(sparse_bitset(T)::out, T::in) is det <= uenum(T).

    % make_singleton_set(Item) returns a set containing just the single Item.
    %
:- func make_singleton_set(T) = sparse_bitset(T) <= uenum(T).

%--------------------------------------------------%
%
% Emptiness and singleton-ness tests.
%

:- pred is_empty(sparse_bitset(T)::in) is semidet.

:- pred is_non_empty(sparse_bitset(T)::in) is semidet.

    % Is the given set a singleton, and if yes, what is the element?
    %
:- pred is_singleton(sparse_bitset(T)::in, T::out) is semidet <= uenum(T).

%--------------------------------------------------%
%
% Membership tests.
%

    % member(Item, Set) is true iff Item is a member of Set.
    % Takes O(rep_size(Set)) time.
    %
:- pred member(T, sparse_bitset(T)) <= uenum(T).
:- mode member(in, in) is semidet.
:- mode member(out, in) is nondet.

    % contains(Set, Item) is true iff Item is a member of Set.
    % Takes O(rep_size(Set)) time.
    %
:- pred contains(sparse_bitset(T)::in, T::in) is semidet <= uenum(T).

%--------------------------------------------------%
%
% Insertions and deletions.
%

    % insert(Set, Item) returns the union of Set and the set containing
    % only Item. Takes O(rep_size(Set)) time and space.
    %
:- func insert(sparse_bitset(T), T) = sparse_bitset(T) <= uenum(T).
:- pred insert(T::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

    % insert_new(Item, Set0, Set) returns the union of Set0 and the set
    % containing only Item if Set0 does not already contain Item; if it does,
    % it fails. Takes O(rep_size(Set)) time and space.
    %
:- pred insert_new(T::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is semidet <= uenum(T).

    % insert_list(Set, Item) returns the union of Set and the set containing
    % only the members of Item. Same as `union(Set, list_to_set(Item))',
    % but may be more efficient.
    %
:- func insert_list(sparse_bitset(T), list(T)) = sparse_bitset(T) <= uenum(T).
:- pred insert_list(list(T)::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

%--------------------------------------------------%

    % delete(Set, Item) returns the difference of Set and the set containing
    % only Item. Takes O(rep_size(Set)) time and space.
    %
:- func delete(sparse_bitset(T), T) = sparse_bitset(T) <= uenum(T).
:- pred delete(T::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

    % delete_list(Set, Item) returns the difference of Set and the set
    % containing only the members of Item. Same as
    % `difference(Set, list_to_set(Item))', but may be more efficient.
    %
:- func delete_list(sparse_bitset(T), list(T)) = sparse_bitset(T) <= uenum(T).
:- pred delete_list(list(T)::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

    % remove(Item, Set0, Set) returns in Set the difference of Set0
    % and the set containing only Item, failing if Set0 does not contain Item.
    % Takes O(rep_size(Set)) time and space.
    %
:- pred remove(T::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is semidet <= uenum(T).

    % remove_list(Item, Set0, Set) returns in Set the difference of Set0
    % and the set containing all the elements of Item, failing if any element
    % of Item is not in Set0. Same as `subset(list_to_set(Item), Set0),
    % difference(Set0, list_to_set(Item), Set)', but may be more efficient.
    %
:- pred remove_list(list(T)::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is semidet <= uenum(T).

    % remove_leq(Set, Item) returns Set with all elements less than or equal
    % to Item removed. In other words, it returns the set containing all the
    % elements of Set whose enum forms are greater than the enum form of Item.
    %
:- func remove_leq(sparse_bitset(T), T) = sparse_bitset(T) <= uenum(T).
:- pred remove_leq(T::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

    % remove_gt(Set, Item) returns Set with all elements greater than Item
    % removed. In other words, it returns the set containing all the elements
    % of Set whose enum forms are less than or equal to the enum form of Item.
    %
:- func remove_gt(sparse_bitset(T), T) = sparse_bitset(T) <= uenum(T).
:- pred remove_gt(T::in, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

    % remove_least(Set0, Item, Set) is true iff Item is the element
    % whose enum form is the smallest in Set0, and Set is the set
    % which contains all the elements of Set0 except Item. Takes O(1) time
    % and space.
    %
:- pred remove_least(T::out, sparse_bitset(T)::in, sparse_bitset(T)::out)
    is semidet <= uenum(T).

%--------------------------------------------------%
%
% Comparisons between sets.
%

    % equal(SetA, SetB) is true iff SetA and SetB contain the same elements.
    % Takes O(min(rep_size(SetA), rep_size(SetB))) time.
    %
:- pred equal(sparse_bitset(T)::in, sparse_bitset(T)::in) is semidet.

    % subset(Subset, Set) is true iff Subset is a subset of Set.
    % Same as `intersect(Set, Subset, Subset)', but may be more efficient.
    %
:- pred subset(sparse_bitset(T)::in, sparse_bitset(T)::in) is semidet.

    % superset(Superset, Set) is true iff Superset is a superset of Set.
    % Same as `intersect(Superset, Set, Set)', but may be more efficient.
    %
:- pred superset(sparse_bitset(T)::in, sparse_bitset(T)::in) is semidet.

%--------------------------------------------------%
%
% Operations on two or more sets.
%

    % union(SetA, SetB) returns the union of SetA and SetB.
    % The efficiency of the union operation is not sensitive to the argument
    % ordering. Takes O(rep_size(SetA) + rep_size(SetB)) time and space.
    %
:- func union(sparse_bitset(T), sparse_bitset(T)) = sparse_bitset(T).
:- pred union(sparse_bitset(T)::in, sparse_bitset(T)::in,
    sparse_bitset(T)::out) is det.

    % union_list(Sets, Set) returns the union of all the sets in Sets.
    %
:- func union_list(list(sparse_bitset(T))) = sparse_bitset(T).
:- pred union_list(list(sparse_bitset(T))::in, sparse_bitset(T)::out) is det.

    % intersect(SetA, SetB) returns the intersection of SetA and SetB.
    % The efficiency of the intersection operation is not sensitive to the
    % argument ordering. Takes O(rep_size(SetA) + rep_size(SetB)) time and
    % O(min(rep_size(SetA)), rep_size(SetB)) space.
    %
:- func intersect(sparse_bitset(T), sparse_bitset(T)) = sparse_bitset(T).
:- pred intersect(sparse_bitset(T)::in, sparse_bitset(T)::in,
    sparse_bitset(T)::out) is det.

    % intersect_list(Sets, Set) returns the intersection of all the sets
    % in Sets.
    %
:- func intersect_list(list(sparse_bitset(T))) = sparse_bitset(T).
:- pred intersect_list(list(sparse_bitset(T))::in, sparse_bitset(T)::out)
    is det.

    % difference(SetA, SetB) returns the set containing all the elements
    % of SetA except those that occur in SetB. Takes
    % O(rep_size(SetA) + rep_size(SetB)) time and O(rep_size(SetA)) space.
    %
:- func difference(sparse_bitset(T), sparse_bitset(T)) = sparse_bitset(T).
:- pred difference(sparse_bitset(T)::in, sparse_bitset(T)::in,
    sparse_bitset(T)::out) is det.

%--------------------------------------------------%
%
% Operations that divide a set into two parts.
%

    % divide(Pred, Set, InPart, OutPart):
    % InPart consists of those elements of Set for which Pred succeeds;
    % OutPart consists of those elements of Set for which Pred fails.
    %
:- pred divide(pred(T)::in(pred(in) is semidet), sparse_bitset(T)::in,
    sparse_bitset(T)::out, sparse_bitset(T)::out) is det <= uenum(T).

    % divide_by_set(DivideBySet, Set, InPart, OutPart):
    % InPart consists of those elements of Set which are also in DivideBySet;
    % OutPart consists of those elements of Set which are not in DivideBySet.
    %
:- pred divide_by_set(sparse_bitset(T)::in, sparse_bitset(T)::in,
    sparse_bitset(T)::out, sparse_bitset(T)::out) is det <= uenum(T).

%--------------------------------------------------%
%
% Converting lists to sets.
%

    % list_to_set(List) returns a set containing only the members of List.
    % In the worst case, this will take O(length(List)^2) time and space.
    % If the elements of the list are closely grouped, it will be closer
    % to O(length(List)).
    %
:- func list_to_set(list(T)) = sparse_bitset(T) <= uenum(T).
:- pred list_to_set(list(T)::in, sparse_bitset(T)::out) is det <= uenum(T).

    % sorted_list_to_set(List) returns a set containing only the members
    % of List. List must be sorted *on the enum values of the items*.
    % If the to_uint method of uenum(T) preserves order, then this is
    % equivalent to requiring that List be sorted according to type T's
    % comparison operation.
    %
    % This operation takes O(length(List)) time and space.
    %
:- func sorted_list_to_set(list(T)) = sparse_bitset(T) <= uenum(T).
:- pred sorted_list_to_set(list(T)::in, sparse_bitset(T)::out)
    is det <= uenum(T).

%--------------------------------------------------%
%
% Converting sets to lists.
%

    % to_sorted_list(Set) returns a list containing all the members of Set,
    % in sorted order. Takes O(card(Set)) time and space.
    %
:- func to_sorted_list(sparse_bitset(T)) = list(T) <= uenum(T).
:- pred to_sorted_list(sparse_bitset(T)::in, list(T)::out) is det <= uenum(T).

%--------------------------------------------------%
%
% Converting between different kinds of sets.
%

    % from_set(Set) returns a bitset containing only the members of Set.
    % Takes O(card(Set)) time and space.
    %
:- func from_set(set.set(T)) = sparse_bitset(T) <= uenum(T).

    % to_set(Set) returns a set.set containing all the members of Set.
    % Takes O(card(Set)) time and space.
    %
:- func to_set(sparse_bitset(T)) = set.set(T) <= uenum(T).

%--------------------------------------------------%
%
% Counting.
%

    % count(Set) returns the number of elements in Set.
    % Takes O(card(Set)) time.
    %
:- func count(sparse_bitset(T)) = int <= uenum(T).

%--------------------------------------------------%
%
% Standard higher order functions on collections.
%

    % all_true(Pred, Set) succeeds iff Pred(Element) succeeds
    % for all the elements of Set.
    %
:- pred all_true(pred(T)::in(pred(in) is semidet), sparse_bitset(T)::in)
    is semidet <= uenum(T).

    % filter(Pred, Set) returns the elements of Set for which Pred succeeds.
    %
:- func filter(pred(T), sparse_bitset(T)) = sparse_bitset(T) <= uenum(T).
:- mode filter(in(pred(in) is semidet), in) = out is det.

    % filter(Pred, Set, TrueSet, FalseSet) returns the elements of Set
    % for which Pred succeeds, and those for which it fails.
    %
:- pred filter(pred(T), sparse_bitset(T), sparse_bitset(T), sparse_bitset(T))
    <= uenum(T).
:- mode filter(in(pred(in) is semidet), in, out, out) is det.

    % foldl(Func, Set, Start) calls Func with each element of Set
    % (in sorted order) and an accumulator (with the initial value of Start),
    % and returns the final value. Takes O(card(Set)) time.
    %
:- func foldl(func(T, U) = U, sparse_bitset(T), U) = U <= uenum(T).

:- pred foldl(pred(T, U, U), sparse_bitset(T), U, U) <= uenum(T).
:- mode foldl(in(pred(in, in, out) is det), in, in, out) is det.
:- mode foldl(in(pred(in, mdi, muo) is det), in, mdi, muo) is det.
:- mode foldl(in(pred(in, di, uo) is det), in, di, uo) is det.
:- mode foldl(in(pred(in, in, out) is semidet), in, in, out) is semidet.
:- mode foldl(in(pred(in, mdi, muo) is semidet), in, mdi, muo) is semidet.
:- mode foldl(in(pred(in, di, uo) is semidet), in, di, uo) is semidet.
:- mode foldl(in(pred(in, in, out) is nondet), in, in, out) is nondet.
:- mode foldl(in(pred(in, in, out) is cc_multi), in, in, out) is cc_multi.
:- mode foldl(in(pred(in, di, uo) is cc_multi), in, di, uo) is cc_multi.

:- pred foldl2(pred(T, U, U, V, V), sparse_bitset(T), U, U, V, V) <= uenum(T).
:- mode foldl2(in(pred(in, in, out, in, out) is det),
    in, in, out, in, out) is det.
:- mode foldl2(in(pred(in, in, out, mdi, muo) is det),
    in, in, out, mdi, muo) is det.
:- mode foldl2(in(pred(in, in, out, di, uo) is det),
    in, in, out, di, uo) is det.
:- mode foldl2(in(pred(in, di, uo, di, uo) is det),
    in, di, uo, di, uo) is det.
:- mode foldl2(in(pred(in, in, out, in, out) is semidet),
    in, in, out, in, out) is semidet.
:- mode foldl2(in(pred(in, in, out, mdi, muo) is semidet),
    in, in, out, mdi, muo) is semidet.
:- mode foldl2(in(pred(in, in, out, di, uo) is semidet),
    in, in, out, di, uo) is semidet.
:- mode foldl2(in(pred(in, in, out, in, out) is nondet),
    in, in, out, in, out) is nondet.
:- mode foldl2(in(pred(in, in, out, in, out) is cc_multi),
    in, in, out, in, out) is cc_multi.
:- mode foldl2(in(pred(in, in, out, di, uo) is cc_multi),
    in, in, out, di, uo) is cc_multi.
:- mode foldl2(in(pred(in, di, uo, di, uo) is cc_multi),
    in, di, uo, di, uo) is cc_multi.

    % foldr(Func, Set, Start) calls Func with each element of Set
    % (in reverse sorted order) and an accumulator (with the initial value
    % of Start), and returns the final value. Takes O(card(Set)) time.
    %
:- func foldr(func(T, U) = U, sparse_bitset(T), U) = U <= uenum(T).

:- pred foldr(pred(T, U, U), sparse_bitset(T), U, U) <= uenum(T).
:- mode foldr(in(pred(in, in, out) is det), in, in, out) is det.
:- mode foldr(in(pred(in, mdi, muo) is det), in, mdi, muo) is det.
:- mode foldr(in(pred(in, di, uo) is det), in, di, uo) is det.
:- mode foldr(in(pred(in, in, out) is semidet), in, in, out) is semidet.
:- mode foldr(in(pred(in, mdi, muo) is semidet), in, mdi, muo) is semidet.
:- mode foldr(in(pred(in, di, uo) is semidet), in, di, uo) is semidet.
:- mode foldr(in(pred(in, in, out) is nondet), in, in, out) is nondet.
:- mode foldr(in(pred(in, in, out) is cc_multi), in, in, out) is cc_multi.
:- mode foldr(in(pred(in, di, uo) is cc_multi), in, di, uo) is cc_multi.

:- pred foldr2(pred(T, U, U, V, V), sparse_bitset(T), U, U, V, V) <= uenum(T).
:- mode foldr2(in(pred(in, in, out, in, out) is det),
    in, in, out, in, out) is det.
:- mode foldr2(in(pred(in, in, out, mdi, muo) is det),
    in, in, out, mdi, muo) is det.
:- mode foldr2(in(pred(in, in, out, di, uo) is det),
    in, in, out, di, uo) is det.
:- mode foldr2(in(pred(in, di, uo, di, uo) is det),
    in, di, uo, di, uo) is det.
:- mode foldr2(in(pred(in, in, out, in, out) is semidet),
    in, in, out, in, out) is semidet.
:- mode foldr2(in(pred(in, in, out, mdi, muo) is semidet),
    in, in, out, mdi, muo) is semidet.
:- mode foldr2(in(pred(in, in, out, di, uo) is semidet),
    in, in, out, di, uo) is semidet.
:- mode foldr2(in(pred(in, in, out, in, out) is nondet),
    in, in, out, in, out) is nondet.
:- mode foldr2(in(pred(in, di, uo, di, uo) is cc_multi),
    in, di, uo, di, uo) is cc_multi.
:- mode foldr2(in(pred(in, in, out, di, uo) is cc_multi),
    in, in, out, di, uo) is cc_multi.
:- mode foldr2(in(pred(in, in, out, in, out) is cc_multi),
    in, in, out, in, out) is cc_multi.

%--------------------------------------------------%
%--------------------------------------------------%


Next: , Previous: solutions, Up: Top   [Contents]