%--------------------------------------------------% % vim: ft=mercury ts=4 sw=4 et %--------------------------------------------------% % Copyright (C) 2006, 2009-2012 The University of Melbourne. % Copyright (C) 2014-2018, 2021-2022, 2024 The Mercury team. % This file is distributed under the terms specified in COPYING.LIB. %--------------------------------------------------% % % File: tree_bitset.m. % Author: zs, based on sparse_bitset.m by stayl. % Stability: medium. % % This module provides an abstract data type for storing sets of items % that can each be represented by non-negative integers. % % The tree_bitset representation is a variant of the representation used by % sparse_bitset.m, which is a list of Offset/Bits pairs, with the Bits % indicating which of the ubits_per_uint integers starting at Offset % are in the set. % % The problem that the tree_bitset module is intended to solve is that % some operations, such as union and intersection, which are implemented % as a joint traversal of the lists representing the two input operands, % have bad worst-case complexities. For example, an operation to compute % the intersection of the set 0-1,000,000 and the set 2,000,000-3,000,000 % has to traverse 1,000,000/wordsize pairs in the first operand before % finding that the intersection is empty. % % This module addresses this problem by replacing the single global list % of offset/bits pairs with a tree structure. The leaves of this tree % are also offset/bits pairs. Each interior node in the first layer above % the leaves has reachable from it up to 32 such pairs; each node in the % layer above that can reach up to 32*32 pairs, and so on. This means that % operations such as difference can, by skipping one interior node, skip % a large number of offset/bits pairs. % % This is why the operations provided by this module for contains, union, % intersection and difference can be expected to have lower asymptotic % complexities (often logarithmic in the number of elements in the sets, % rather than linear) than the sparse_bitset module. The price for this % is a representation that requires more memory, has higher constant factors, % and an additional factor representing the tree in the complexity of the % operations that construct tree_bitsets. However, since the depth of the tree % has a small upper bound for all sets of a practical size, we will fold this % into the "higher constant factors" in the descriptions of the complexity % of the individual operations below. % % All this means that using a tree_bitset in preference to a sparse_bitset % is likely to be a good idea only when the sizes of the sets to be manipulated % are quite big, or when worst-case performance is important. % %--------------------------------------------------% :- module tree_bitset. :- interface. :- import_module enum. :- import_module list. :- import_module term. :- use_module set. %--------------------------------------------------% :- type tree_bitset(T). % <= uenum(T). %--------------------------------------------------% % % Initial creation of sets. % % Return an empty set. % :- func init = tree_bitset(T). % make_singleton_set(Elem) returns a set containing just the single % element Elem. % :- func make_singleton_set(T) = tree_bitset(T) <= uenum(T). %--------------------------------------------------% % % Emptiness and singleton-ness tests. % :- pred is_empty(tree_bitset(T)::in) is semidet. :- pred is_non_empty(tree_bitset(T)::in) is semidet. % Is the given set a singleton, and if yes, what is the element? % :- pred is_singleton(tree_bitset(T)::in, T::out) is semidet <= uenum(T). %--------------------------------------------------% % % Membership tests. % % member(X, Set) is true iff X is a member of Set. % Takes O(card(Set)) time for the semidet mode. % :- pred member(T, tree_bitset(T)) <= uenum(T). :- mode member(in, in) is semidet. :- mode member(out, in) is nondet. % contains(Set, X) is true iff X is a member of Set. % Takes O(log(card(Set))) time. % :- pred contains(tree_bitset(T)::in, T::in) is semidet <= uenum(T). %--------------------------------------------------% % % Insertions and deletions. % % insert(Set, X) returns the union of Set and the set containing % only X. Takes O(log(card(Set))) time and space. % :- func insert(tree_bitset(T), T) = tree_bitset(T) <= uenum(T). :- pred insert(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is det <= uenum(T). % insert_new(X, Set0, Set) returns the union of Set and the set % containing only X is Set0 does not contain 'X'; if it does, it fails. % Takes O(log(card(Set))) time and space. % :- pred insert_new(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is semidet <= uenum(T). % insert_list(Set, X) returns the union of Set and the set containing % only the members of X. Same as `union(Set, list_to_set(X))', but may be % more efficient. % :- func insert_list(tree_bitset(T), list(T)) = tree_bitset(T) <= uenum(T). :- pred insert_list(list(T)::in, tree_bitset(T)::in, tree_bitset(T)::out) is det <= uenum(T). %--------------------------------------------------% % delete(Set, X) returns the difference of Set and the set containing % only X. Takes O(card(Set)) time and space. % :- func delete(tree_bitset(T), T) = tree_bitset(T) <= uenum(T). :- pred delete(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is det <= uenum(T). % delete_list(Set, X) returns the difference of Set and the set % containing only the members of X. Same as % `difference(Set, list_to_set(X))', but may be more efficient. % :- func delete_list(tree_bitset(T), list(T)) = tree_bitset(T) <= uenum(T). :- pred delete_list(list(T)::in, tree_bitset(T)::in, tree_bitset(T)::out) is det <= uenum(T). % remove(X, Set0, Set) returns in Set the difference of Set0 % and the set containing only X, failing if Set0 does not contain X. % Takes O(log(card(Set))) time and space. % :- pred remove(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is semidet <= uenum(T). % remove_list(X, Set0, Set) returns in Set the difference of Set0 % and the set containing all the elements of X, failing if any element % of X is not in Set0. Same as `subset(list_to_set(X), Set0), % difference(Set0, list_to_set(X), Set)', but may be more efficient. % :- pred remove_list(list(T)::in, tree_bitset(T)::in, tree_bitset(T)::out) is semidet <= uenum(T). % remove_leq(Set, X) returns Set with all elements less than or equal % to X removed. In other words, it returns the set containing all the % elements of Set which are greater than X. Takes O(log(card(Set))) % time and space. % :- func remove_leq(tree_bitset(T), T) = tree_bitset(T) <= uenum(T). :- pred remove_leq(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is det <= uenum(T). % remove_gt(Set, X) returns Set with all elements greater than X % removed. In other words, it returns the set containing all the elements % of Set which are less than or equal to X. Takes O(log(card(Set))) % time and space. % :- func remove_gt(tree_bitset(T), T) = tree_bitset(T) <= uenum(T). :- pred remove_gt(T::in, tree_bitset(T)::in, tree_bitset(T)::out) is det <= uenum(T). % remove_least(Set0, X, Set) is true iff X is the least element in % Set0, and Set is the set which contains all the elements of Set0 % except X. Takes O(1) time and space. % :- pred remove_least(T::out, tree_bitset(T)::in, tree_bitset(T)::out) is semidet <= uenum(T). %--------------------------------------------------% % % Comparisons between sets. % % equal(SetA, SetB) is true iff SetA and SetB contain the same % elements. Takes O(min(card(SetA), card(SetB))) time. % :- pred equal(tree_bitset(T)::in, tree_bitset(T)::in) is semidet <= uenum(T). % subset(Subset, Set) is true iff Subset is a subset of Set. % Same as `intersect(Set, Subset, Subset)', but may be more efficient. % :- pred subset(tree_bitset(T)::in, tree_bitset(T)::in) is semidet. % superset(Superset, Set) is true iff Superset is a superset of Set. % Same as `intersect(Superset, Set, Set)', but may be more efficient. % :- pred superset(tree_bitset(T)::in, tree_bitset(T)::in) is semidet. %--------------------------------------------------% % % Operations on two or more sets. % % union(SetA, SetB) returns the union of SetA and SetB. The % efficiency of the union operation is not sensitive to the argument % ordering. Takes somewhere between O(log(card(SetA)) + log(card(SetB))) % and O(card(SetA) + card(SetB)) time and space. % :- func union(tree_bitset(T), tree_bitset(T)) = tree_bitset(T). :- pred union(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out) is det. % union_list(Sets, Set) returns the union of all the sets in Sets. % :- func union_list(list(tree_bitset(T))) = tree_bitset(T). :- pred union_list(list(tree_bitset(T))::in, tree_bitset(T)::out) is det. % intersect(SetA, SetB) returns the intersection of SetA and SetB. % The efficiency of the intersection operation is not sensitive to the % argument ordering. Takes somewhere between % O(log(card(SetA)) + log(card(SetB))) and O(card(SetA) + card(SetB)) time, % and O(min(card(SetA)), card(SetB)) space. % :- func intersect(tree_bitset(T), tree_bitset(T)) = tree_bitset(T). :- pred intersect(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out) is det. % intersect_list(Sets, Set) returns the intersection of all the sets % in Sets. % :- func intersect_list(list(tree_bitset(T))) = tree_bitset(T). :- pred intersect_list(list(tree_bitset(T))::in, tree_bitset(T)::out) is det. % difference(SetA, SetB) returns the set containing all the elements % of SetA except those that occur in SetB. Takes somewhere between % O(log(card(SetA)) + log(card(SetB))) and O(card(SetA) + card(SetB)) time, % and O(card(SetA)) space. % :- func difference(tree_bitset(T), tree_bitset(T)) = tree_bitset(T). :- pred difference(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out) is det. %--------------------------------------------------% % % Operations that divide a set into two parts. % % divide(Pred, Set, InPart, OutPart): % InPart consists of those elements of Set for which Pred succeeds; % OutPart consists of those elements of Set for which Pred fails. % :- pred divide(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in, tree_bitset(T)::out, tree_bitset(T)::out) is det <= uenum(T). % divide_by_set(DivideBySet, Set, InPart, OutPart): % InPart consists of those elements of Set which are also in DivideBySet; % OutPart consists of those elements of Set which are not in DivideBySet. % :- pred divide_by_set(tree_bitset(T)::in, tree_bitset(T)::in, tree_bitset(T)::out, tree_bitset(T)::out) is det <= uenum(T). %--------------------------------------------------% % % Converting lists to sets. % % list_to_set(List) returns a set containing only the members of List. % Takes O(length(List)) time and space. % :- func list_to_set(list(T)) = tree_bitset(T) <= uenum(T). :- pred list_to_set(list(T)::in, tree_bitset(T)::out) is det <= uenum(T). % sorted_list_to_set(List) returns a set containing only the members % of List. List must be sorted *on the enum values of the items*. % If the to_uint method of uenum(T) preserves order, then this is % equivalent to requiring that List be sorted according to type T's % comparison operation. % % This operation takes O(length(List)) time and space. % :- func sorted_list_to_set(list(T)) = tree_bitset(T) <= uenum(T). :- pred sorted_list_to_set(list(T)::in, tree_bitset(T)::out) is det <= uenum(T). %--------------------------------------------------% % % Converting sets to lists. % % to_sorted_list(Set)' returns a list containing all the members of Set, % in sorted order. Takes O(card(Set)) time and space. % :- func to_sorted_list(tree_bitset(T)) = list(T) <= uenum(T). :- pred to_sorted_list(tree_bitset(T)::in, list(T)::out) is det <= uenum(T). %--------------------------------------------------% % % Converting between different kinds of sets. % % from_set(Set) returns a bitset containing only the members of Set. % Takes O(card(Set)) time and space. % :- func from_set(set.set(T)) = tree_bitset(T) <= uenum(T). % to_sorted_list(Set) returns a set.set containing all the members % of Set, in sorted order. Takes O(card(Set)) time and space. % :- func to_set(tree_bitset(T)) = set.set(T) <= uenum(T). %--------------------------------------------------% % % Counting. % % count(Set) returns the number of elements in Set. % Takes O(card(Set)) time. % :- func count(tree_bitset(T)) = int <= uenum(T). %--------------------------------------------------% % % Standard higher order functions on collections. % % all_true(Pred, Set) succeeds iff Pred(Element) succeeds % for all the elements of Set. % :- pred all_true(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in) is semidet <= uenum(T). % filter(Pred, Set) returns the elements of Set for which Pred succeeds. % :- func filter(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in) = (tree_bitset(T)::out) is det <= uenum(T). % filter(Pred, Set, TrueSet, FalseSet) returns the elements of Set % for which Pred succeeds, and those for which it fails. % :- pred filter(pred(T)::in(pred(in) is semidet), tree_bitset(T)::in, tree_bitset(T)::out, tree_bitset(T)::out) is det <= uenum(T). % foldl(Func, Set, Start) calls Func with each element of Set % (in sorted order) and an accumulator (with the initial value of Start), % and returns the final value. Takes O(card(Set)) time. % :- func foldl(func(T, A) = A, tree_bitset(T), A) = A <= uenum(T). :- pred foldl(pred(T, A, A), tree_bitset(T), A, A) <= uenum(T). :- mode foldl(in(pred(in, in, out) is det), in, in, out) is det. :- mode foldl(in(pred(in, mdi, muo) is det), in, mdi, muo) is det. :- mode foldl(in(pred(in, di, uo) is det), in, di, uo) is det. :- mode foldl(in(pred(in, in, out) is semidet), in, in, out) is semidet. :- mode foldl(in(pred(in, mdi, muo) is semidet), in, mdi, muo) is semidet. :- mode foldl(in(pred(in, di, uo) is semidet), in, di, uo) is semidet. :- mode foldl(in(pred(in, in, out) is nondet), in, in, out) is nondet. :- mode foldl(in(pred(in, mdi, muo) is nondet), in, mdi, muo) is nondet. :- mode foldl(in(pred(in, di, uo) is cc_multi), in, di, uo) is cc_multi. :- mode foldl(in(pred(in, in, out) is cc_multi), in, in, out) is cc_multi. :- pred foldl2(pred(T, A, A, B, B), tree_bitset(T), A, A, B, B) <= uenum(T). :- mode foldl2(in(pred(in, di, uo, di, uo) is det), in, di, uo, di, uo) is det. :- mode foldl2(in(pred(in, in, out, di, uo) is det), in, in, out, di, uo) is det. :- mode foldl2(in(pred(in, in, out, in, out) is det), in, in, out, in, out) is det. :- mode foldl2(in(pred(in, in, out, in, out) is semidet), in, in, out, in, out) is semidet. :- mode foldl2(in(pred(in, in, out, in, out) is nondet), in, in, out, in, out) is nondet. :- mode foldl2(in(pred(in, di, uo, di, uo) is cc_multi), in, di, uo, di, uo) is cc_multi. :- mode foldl2(in(pred(in, in, out, di, uo) is cc_multi), in, in, out, di, uo) is cc_multi. :- mode foldl2(in(pred(in, in, out, in, out) is cc_multi), in, in, out, in, out) is cc_multi. % foldr(Func, Set, Start) calls Func with each element of Set % (in reverse sorted order) and an accumulator (with the initial value % of Start), and returns the final value. Takes O(card(Set)) time. % :- func foldr(func(T, A) = A, tree_bitset(T), A) = A <= uenum(T). :- pred foldr(pred(T, A, A), tree_bitset(T), A, A) <= uenum(T). :- mode foldr(in(pred(in, di, uo) is det), in, di, uo) is det. :- mode foldr(in(pred(in, in, out) is det), in, in, out) is det. :- mode foldr(in(pred(in, in, out) is semidet), in, in, out) is semidet. :- mode foldr(in(pred(in, in, out) is nondet), in, in, out) is nondet. :- mode foldr(in(pred(in, di, uo) is cc_multi), in, di, uo) is cc_multi. :- mode foldr(in(pred(in, in, out) is cc_multi), in, in, out) is cc_multi. :- pred foldr2(pred(T, A, A, B, B), tree_bitset(T), A, A, B, B) <= uenum(T). :- mode foldr2(in(pred(in, di, uo, di, uo) is det), in, di, uo, di, uo) is det. :- mode foldr2(in(pred(in, in, out, di, uo) is det), in, in, out, di, uo) is det. :- mode foldr2(in(pred(in, in, out, in, out) is det), in, in, out, in, out) is det. :- mode foldr2(in(pred(in, in, out, in, out) is semidet), in, in, out, in, out) is semidet. :- mode foldr2(in(pred(in, in, out, in, out) is nondet), in, in, out, in, out) is nondet. :- mode foldr2(in(pred(in, di, uo, di, uo) is cc_multi), in, di, uo, di, uo) is cc_multi. :- mode foldr2(in(pred(in, in, out, di, uo) is cc_multi), in, in, out, di, uo) is cc_multi. :- mode foldr2(in(pred(in, in, out, in, out) is cc_multi), in, in, out, in, out) is cc_multi. %--------------------------------------------------%