%--------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%--------------------------------------------------%
% Copyright (C) 2006, 2009-2012 The University of Melbourne.
% Copyright (C) 2014-2018, 2021-2022, 2024-2025 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%--------------------------------------------------%
%
% File: tree_bitset.m.
% Author: zs, based on sparse_bitset.m by stayl.
% Stability: high.
%
% This module provides an abstract data type for storing sets of items
% that can each be represented by unsigned integers.
%
% The tree_bitset representation is a variant of the representation used by
% sparse_bitset.m, which is a list of Offset/Bits pairs, with the Bits
% indicating which of the ubits_per_uint integers starting at Offset
% are in the set.
%
% The problem that the tree_bitset module is intended to solve is that
% some operations, such as union and intersection, which are implemented
% as a joint traversal of the lists representing the two input operands,
% have bad worst-case complexities. For example, an operation to compute
% the intersection of the set 0-1,000,000 and the set 2,000,000-3,000,000
% has to traverse 1,000,000/wordsize pairs in the first operand before
% finding that the intersection is empty.
%
% This module addresses this problem by replacing the single global list
% of offset/bits pairs with a tree structure. The leaves of this tree
% are also offset/bits pairs. Each interior node in the first layer above
% the leaves has reachable from it up to 32 such pairs; each node in the
% layer above that can reach up to 32*32 pairs, and so on. This means that
% operations such as difference can, by skipping one interior node, skip
% a large number of offset/bits pairs.
%
% This is why the operations provided by this module for contains, union,
% intersection and difference can be expected to have lower asymptotic
% complexities (often logarithmic in the number of elements in the sets,
% rather than linear) than the sparse_bitset module. The price for this
% is a representation that requires more memory, has higher constant factors,
% and an additional factor representing the tree in the complexity of the
% operations that construct tree_bitsets. However, since the depth of the tree
% has a small upper bound for all sets of a practical size, we will fold this
% into the "higher constant factors" in the descriptions of the complexity
% of the individual operations below.
%
% All this means that using a tree_bitset in preference to a sparse_bitset
% is likely to be a good idea only when the sizes of the sets to be manipulated
% are quite big, or when worst-case performance is important.
%
%--------------------------------------------------%
:- module tree_bitset.
:- interface.
:- import_module enum.
:- import_module list.
:- import_module term.
:- use_module set.
%--------------------------------------------------%
:- type tree_bitset(T). % <= uenum(T).
%--------------------------------------------------%
%
% Initial creation of sets.
%
% Return an empty set.
%
:- func init = tree_bitset(T).
% make_singleton_set(Elem) returns a set containing just the single
% element Elem.
%
:- func make_singleton_set(T) = tree_bitset(T) <= uenum(T).
%--------------------------------------------------%
%
% Emptiness and singleton-ness tests.
%
:- pred is_empty(tree_bitset(T)::in) is semidet.
:- pred is_non_empty(tree_bitset(T)::in) is semidet.
% Is the given set a singleton, and if yes, what is the element?
%
:- pred is_singleton(tree_bitset(T)::in, T::out) is semidet <= uenum(T).
%--------------------------------------------------%
%
% Membership tests.
%
% member(X, Set) is true if-and-only-if X is a member of Set.
% Takes O(card(Set)) time for the semidet mode.
%
:- pred member(T, tree_bitset(T)) <= uenum(T).
:- mode member(in, in) is semidet.
:- mode member(out, in) is nondet.
% contains(Set, X) is true if-and-only-if X is a member of Set.
% Takes O(log(card(Set))) time.
%
:- pred contains(tree_bitset(T)::in, T::in) is semidet <= uenum(T).
:- pred nondet_member(tree_bitset(T)::in, T::out) is nondet <= uenum(T).
%--------------------------------------------------%
%
% Insertions and deletions.
%
% insert(Set, X) returns the union of Set and the set containing
% only X. Takes O(log(card(Set))) time and space.
%
:- func insert(tree_bitset(T), T) = tree_bitset(T) <= uenum(T).
:- pred insert(T::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det <= uenum(T).
% insert_new(X, Set0, Set) returns the union of Set and the set
% containing only X is Set0 does not contain 'X'; if it does, it fails.
% Takes O(log(card(Set))) time and space.
%
:- pred insert_new(T::in, tree_bitset(T)::in, tree_bitset(T)::out)
is semidet <= uenum(T).
% insert_list(Set, X) returns the union of Set and the set containing
% only the members of X. Same as `union(Set, list_to_set(X))', but may be
% more efficient.
%
:- func insert_list(tree_bitset(T), list(T)) = tree_bitset(T) <= uenum(T).
:- pred insert_list(list(T)::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det <= uenum(T).
%--------------------------------------------------%
% delete(Set, X) returns the difference of Set and the set containing
% only X. Takes O(card(Set)) time and space.
%
:- func delete(tree_bitset(T), T) = tree_bitset(T) <= uenum(T).
:- pred delete(T::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det <= uenum(T).
% delete_list(Set, X) returns the difference of Set and the set
% containing only the members of X. Same as
% `difference(Set, list_to_set(X))', but may be more efficient.
%
:- func delete_list(tree_bitset(T), list(T)) = tree_bitset(T) <= uenum(T).
:- pred delete_list(list(T)::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det <= uenum(T).
% remove(X, Set0, Set) returns in Set the difference of Set0
% and the set containing only X, failing if Set0 does not contain X.
% Takes O(log(card(Set))) time and space.
%
:- pred remove(T::in, tree_bitset(T)::in, tree_bitset(T)::out)
is semidet <= uenum(T).
% remove_list(X, Set0, Set) returns in Set the difference of Set0
% and the set containing all the elements of X, failing if any element
% of X is not in Set0. Same as `subset(list_to_set(X), Set0),
% difference(Set0, list_to_set(X), Set)', but may be more efficient.
%
:- pred remove_list(list(T)::in, tree_bitset(T)::in, tree_bitset(T)::out)
is semidet <= uenum(T).
% remove_leq(Set, X) returns Set with all elements less than or equal
% to X removed. In other words, it returns the set containing all the
% elements of Set which are greater than X. Takes O(log(card(Set)))
% time and space.
%
:- func remove_leq(tree_bitset(T), T) = tree_bitset(T) <= uenum(T).
:- pred remove_leq(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is det
<= uenum(T).
% remove_gt(Set, X) returns Set with all elements greater than X
% removed. In other words, it returns the set containing all the elements
% of Set which are less than or equal to X. Takes O(log(card(Set)))
% time and space.
%
:- func remove_gt(tree_bitset(T), T) = tree_bitset(T) <= uenum(T).
:- pred remove_gt(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is det
<= uenum(T).
% remove_least(Set0, X, Set) is true if-and-only-if
% X is the least element in Set0, and Set is the set which contains
% all the elements of Set0 except X. Takes O(1) time and space.
%
:- pred remove_least(T::out, tree_bitset(T)::in, tree_bitset(T)::out)
is semidet <= uenum(T).
%--------------------------------------------------%
%
% Comparisons between sets.
%
% equal(SetA, SetB) is true if-and-only-if SetA and SetB contain the same
% elements. Takes O(min(card(SetA), card(SetB))) time.
%
:- pred equal(tree_bitset(T)::in, tree_bitset(T)::in) is semidet <= uenum(T).
% subset(Subset, Set) is true if-and-only-if Subset is a subset of Set.
% Same as `intersect(Set, Subset, Subset)', but may be more efficient.
%
:- pred subset(tree_bitset(T)::in, tree_bitset(T)::in) is semidet.
% superset(Superset, Set) is true if-and-only-if
% Superset is a superset of Set.
% Same as `intersect(Superset, Set, Set)', but may be more efficient.
%
:- pred superset(tree_bitset(T)::in, tree_bitset(T)::in) is semidet.
%--------------------------------------------------%
%
% Operations on two or more sets.
%
% union(SetA, SetB) returns the union of SetA and SetB. The
% efficiency of the union operation is not sensitive to the argument
% ordering. Takes somewhere between O(log(card(SetA)) + log(card(SetB)))
% and O(card(SetA) + card(SetB)) time and space.
%
:- func union(tree_bitset(T), tree_bitset(T)) = tree_bitset(T).
:- pred union(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det.
% union_list(Sets, Set) returns the union of all the sets in Sets.
%
:- func union_list(list(tree_bitset(T))) = tree_bitset(T).
:- pred union_list(list(tree_bitset(T))::in, tree_bitset(T)::out) is det.
% intersect(SetA, SetB) returns the intersection of SetA and SetB.
% The efficiency of the intersection operation is not sensitive to the
% argument ordering. Takes somewhere between
% O(log(card(SetA)) + log(card(SetB))) and O(card(SetA) + card(SetB)) time,
% and O(min(card(SetA)), card(SetB)) space.
%
:- func intersect(tree_bitset(T), tree_bitset(T)) = tree_bitset(T).
:- pred intersect(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det.
% intersect_list(Sets, Set) returns the intersection of all the sets
% in Sets.
%
:- func intersect_list(list(tree_bitset(T))) = tree_bitset(T).
:- pred intersect_list(list(tree_bitset(T))::in, tree_bitset(T)::out) is det.
% difference(SetA, SetB) returns the set containing all the elements
% of SetA except those that occur in SetB. Takes somewhere between
% O(log(card(SetA)) + log(card(SetB))) and O(card(SetA) + card(SetB)) time,
% and O(card(SetA)) space.
%
:- func difference(tree_bitset(T), tree_bitset(T)) = tree_bitset(T).
:- pred difference(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out)
is det.
%--------------------------------------------------%
%
% Operations that divide a set into two parts.
%
% divide(Pred, Set, InPart, OutPart):
% InPart consists of those elements of Set for which Pred succeeds;
% OutPart consists of those elements of Set for which Pred fails.
%
:- pred divide(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in,
tree_bitset(T)::out, tree_bitset(T)::out) is det <= uenum(T).
% divide_by_set(DivideBySet, Set, InPart, OutPart):
% InPart consists of those elements of Set which are also in DivideBySet;
% OutPart consists of those elements of Set which are not in DivideBySet.
%
:- pred divide_by_set(tree_bitset(T)::in, tree_bitset(T)::in,
tree_bitset(T)::out, tree_bitset(T)::out) is det <= uenum(T).
%--------------------------------------------------%
%
% Converting lists to sets.
%
% list_to_set(List) returns a set containing only the members of List.
% Takes O(length(List)) time and space.
%
:- func list_to_set(list(T)) = tree_bitset(T) <= uenum(T).
:- pred list_to_set(list(T)::in, tree_bitset(T)::out) is det <= uenum(T).
% A synonym for list_to_set/1.
%
:- func from_list(list(T)) = tree_bitset(T) <= uenum(T).
% sorted_list_to_set(List) returns a set containing only the members
% of List. List must be sorted *on the enum values of the items*.
% If the to_uint method of uenum(T) preserves order, then this is
% equivalent to requiring that List be sorted according to type T's
% comparison operation.
%
% This operation takes O(length(List)) time and space.
%
:- func sorted_list_to_set(list(T)) = tree_bitset(T) <= uenum(T).
:- pred sorted_list_to_set(list(T)::in, tree_bitset(T)::out) is det
<= uenum(T).
% A synonym for sorted_list_to_set/1.
%
:- func from_sorted_list(list(T)) = tree_bitset(T) <= uenum(T).
%--------------------------------------------------%
%
% Converting sets to lists.
%
% to_sorted_list(Set)' returns a list containing all the members of Set,
% in sorted order. Takes O(card(Set)) time and space.
%
:- func to_sorted_list(tree_bitset(T)) = list(T) <= uenum(T).
:- pred to_sorted_list(tree_bitset(T)::in, list(T)::out) is det <= uenum(T).
%--------------------------------------------------%
%
% Converting between different kinds of sets.
%
% from_set(Set) returns a bitset containing only the members of Set.
% Takes O(card(Set)) time and space.
%
:- func from_set(set.set(T)) = tree_bitset(T) <= uenum(T).
% to_sorted_list(Set) returns a set.set containing all the members
% of Set, in sorted order. Takes O(card(Set)) time and space.
%
:- func to_set(tree_bitset(T)) = set.set(T) <= uenum(T).
%--------------------------------------------------%
%
% Counting.
%
% count(Set) returns the number of elements in Set.
% Takes O(card(Set)) time.
%
:- func count(tree_bitset(T)) = int <= uenum(T).
%--------------------------------------------------%
%
% Standard higher order functions on collections.
%
% all_true(Pred, Set) succeeds if-and-only-if
% Pred(Element) succeeds for all the elements of Set.
%
:- pred all_true(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in)
is semidet <= uenum(T).
% filter(Pred, Set) returns the elements of Set for which Pred succeeds.
%
:- func filter(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in)
= (tree_bitset(T)::out) is det <= uenum(T).
% filter(Pred, Set, TrueSet, FalseSet) returns the elements of Set
% for which Pred succeeds, and those for which it fails.
%
:- pred filter(pred(T)::in(pred(in) is semidet),
tree_bitset(T)::in, tree_bitset(T)::out, tree_bitset(T)::out) is det
<= uenum(T).
% foldl(Func, Set, Start) calls Func with each element of Set
% (in sorted order) and an accumulator (with the initial value of Start),
% and returns the final value. Takes O(card(Set)) time.
%
:- func foldl(func(T, A) = A, tree_bitset(T), A) = A <= uenum(T).
:- pred foldl(pred(T, A, A), tree_bitset(T), A, A) <= uenum(T).
:- mode foldl(in(pred(in, in, out) is det), in, in, out) is det.
:- mode foldl(in(pred(in, mdi, muo) is det), in, mdi, muo) is det.
:- mode foldl(in(pred(in, di, uo) is det), in, di, uo) is det.
:- mode foldl(in(pred(in, in, out) is semidet), in, in, out) is semidet.
:- mode foldl(in(pred(in, mdi, muo) is semidet), in, mdi, muo) is semidet.
:- mode foldl(in(pred(in, di, uo) is semidet), in, di, uo) is semidet.
:- mode foldl(in(pred(in, in, out) is nondet), in, in, out) is nondet.
:- mode foldl(in(pred(in, mdi, muo) is nondet), in, mdi, muo) is nondet.
:- mode foldl(in(pred(in, di, uo) is cc_multi), in, di, uo) is cc_multi.
:- mode foldl(in(pred(in, in, out) is cc_multi), in, in, out) is cc_multi.
:- pred foldl2(pred(T, A, A, B, B), tree_bitset(T), A, A, B, B) <= uenum(T).
:- mode foldl2(in(pred(in, di, uo, di, uo) is det),
in, di, uo, di, uo) is det.
:- mode foldl2(in(pred(in, in, out, di, uo) is det),
in, in, out, di, uo) is det.
:- mode foldl2(in(pred(in, in, out, in, out) is det),
in, in, out, in, out) is det.
:- mode foldl2(in(pred(in, in, out, in, out) is semidet),
in, in, out, in, out) is semidet.
:- mode foldl2(in(pred(in, in, out, in, out) is nondet),
in, in, out, in, out) is nondet.
:- mode foldl2(in(pred(in, di, uo, di, uo) is cc_multi),
in, di, uo, di, uo) is cc_multi.
:- mode foldl2(in(pred(in, in, out, di, uo) is cc_multi),
in, in, out, di, uo) is cc_multi.
:- mode foldl2(in(pred(in, in, out, in, out) is cc_multi),
in, in, out, in, out) is cc_multi.
% foldr(Func, Set, Start) calls Func with each element of Set
% (in reverse sorted order) and an accumulator (with the initial value
% of Start), and returns the final value. Takes O(card(Set)) time.
%
:- func foldr(func(T, A) = A, tree_bitset(T), A) = A <= uenum(T).
:- pred foldr(pred(T, A, A), tree_bitset(T), A, A) <= uenum(T).
:- mode foldr(in(pred(in, di, uo) is det), in, di, uo) is det.
:- mode foldr(in(pred(in, in, out) is det), in, in, out) is det.
:- mode foldr(in(pred(in, in, out) is semidet), in, in, out) is semidet.
:- mode foldr(in(pred(in, in, out) is nondet), in, in, out) is nondet.
:- mode foldr(in(pred(in, di, uo) is cc_multi), in, di, uo) is cc_multi.
:- mode foldr(in(pred(in, in, out) is cc_multi), in, in, out) is cc_multi.
:- pred foldr2(pred(T, A, A, B, B), tree_bitset(T), A, A, B, B) <= uenum(T).
:- mode foldr2(in(pred(in, di, uo, di, uo) is det),
in, di, uo, di, uo) is det.
:- mode foldr2(in(pred(in, in, out, di, uo) is det),
in, in, out, di, uo) is det.
:- mode foldr2(in(pred(in, in, out, in, out) is det),
in, in, out, in, out) is det.
:- mode foldr2(in(pred(in, in, out, in, out) is semidet),
in, in, out, in, out) is semidet.
:- mode foldr2(in(pred(in, in, out, in, out) is nondet),
in, in, out, in, out) is nondet.
:- mode foldr2(in(pred(in, di, uo, di, uo) is cc_multi),
in, di, uo, di, uo) is cc_multi.
:- mode foldr2(in(pred(in, in, out, di, uo) is cc_multi),
in, in, out, di, uo) is cc_multi.
:- mode foldr2(in(pred(in, in, out, in, out) is cc_multi),
in, in, out, in, out) is cc_multi.
%--------------------------------------------------%