In article <199512081407.QAA05660@lk-hp-2.hut.fi>, Jarkko Hietaniemi (jhi@snakemail.hut.fi) wrote:
> > How do you calculate the minimum, maximum and norm of a set in Perl
> > efficiently?
> This one is easy. You don't. :-) Please do not talk about 'sets'
> because the concept is by no means well-defined
Cantor is rotating in his tomb... ;-)
> (as this very
> discussion serves to show). "The minima, maxima and norms" of a "set"
> even less so.
This is not true for Analysis, where infimum, supremum, minimum and maximum
are well-defined for sets... (of reals)
And in computer science the norm of a set (of integers) is also well-defined...
;-)
> ++jhi;
Sorry! You don't win a point this time! ;-) ;-) ;-)
Well, but you're right: The concept of sets is not well-defined in the sense
that there are many possible ways of REALIZING sets.
For instance, have you already seen Kruskal's algorithm in "Efficient
Algorithms"?
It uses sets that are stored as TREES in an array!
I show you the Perl program for it:
-------------------- cut here -------------------- cut here --------------------
#!perl -w
# Copyright (c) 1995 by Steffen Beyer. All rights reserved.
# This package is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
################################################################################
# Module implementing Kruskal's algorithm for minimal spanning trees in graphs #
################################################################################
# "require" this module in order to use it.
$number_of_edges = 0;
$number_of_vortices = 0;
# Input: A set of vortices which constitute a graph (some cities on a map,
# for example), a set of edges (i.e., roads) between the vortices of the
# (non-directed and connected) graph (i.e., the edges can be traveled in
# either direction, and a path must exist between any two vortices), and
# the cost of each edge (for instance, the geographical distance).
# Output: A set of edges forming a spanning tree (i.e., a set of edges linking
# all vortices, so that a path exists between any two vortices) which is free
# of circles (because it's a tree) and which is minimal in terms of the cost
# function defined on the set of edges.
# See Aho, Hopcroft, Ullman, "The Design and Analysis of Computer Algorithms"
# for more details on the algorithm.
# Remove the '#'s from the beginning of the following two lines and run
# this script as a stand-alone program (or call &example; after requiring
# this module) to see an example:
#&example;
#exit;
sub example
{
my($costs) = 0;
my($k);
print "\n";
print "+++ Kruskal's Algorithm for Minimal Spanning Trees in Graphs +++";
print "\n";
&define_vortices(2,3,5,7,11,13,17,19,23,29,31);
print "\nVortices:\n\n";
for ( $k = 1; $k <= $#VS; ++$k )
{
if (defined $VS[$k]) { print "$k\n"; }
}
&define_edges( 2,13,3, 3,13,2, 5,13,1, 3,5,2, 3,29,21, 23,29,3,
23,31,2, 5,31,15, 5,7,10, 2,11,2, 7,11,2, 7,19,5, 11,19,2,
7,31,4, 3,17,3, 17,23,3, 7,17,3 );
print "\nEdges:\n\n";
for ( $k = 1; $k <= $#E; ++$k )
{
print ${$E[$k]}{'from'}, " <-> ", ${$E[$k]}{'to'}, " = ",
${$E[$k]}{'cost'}, "\n";
}
&kruskal;
print "\nEdges in minimal spanning tree:\n\n";
for ( $k = 1; $k <= $#T; ++$k )
{
print ${$T[$k]}{'from'}, " <-> ", ${$T[$k]}{'to'}, " = ",
${$T[$k]}{'cost'}, "\n";
$costs += ${$T[$k]}{'cost'};
}
print "\nTotal costs: $costs\n\n";
}
sub define_vortices
{
undef @VS;
$number_of_vortices = 0;
foreach (@_)
{
($_ > 0) || die "vortex number is not positive!\n";
$VS[$_] = -1;
++$number_of_vortices;
}
}
sub define_edges
{
undef @E;
$number_of_edges = 0;
while (@_)
{
$from = shift || die "missing 'from' vortex number!\n";
$to = shift || die "missing 'to' vortex number!\n";
$cost = shift || die "missing edge 'cost' value!\n";
defined $VS[$from] || die "vortex '$from' not previously defined!\n";
defined $VS[$to] || die "vortex '$to' not previously defined!\n";
($from != $to) || die "vortices 'from' and 'to' are the same!\n";
$E[++$number_of_edges] =
{ 'from' => $from, 'to' => $to, 'cost' => $cost };
}
}
sub heapify # complexity: O(ld n)
{
my($i,$n) = @_;
my($i2,$i21,$j,$swap);
while ($i < $n)
{
$j = $i;
$i2 = $i * 2;
$i21 = $i2 + 1;
if ($i2 <= $n)
{
if (${$E[$i]}{'cost'} > ${$E[$i2]}{'cost'})
{
$j = $i2;
if ($i21 <= $n)
{
if (${$E[$i2]}{'cost'} > ${$E[$i21]}{'cost'}) { $j = $i21; }
}
}
else
{
if ($i21 <= $n)
{
if (${$E[$i]}{'cost'} > ${$E[$i21]}{'cost'}) { $j = $i21; }
}
}
}
if ($i != $j)
{
$swap = $E[$i];
$E[$i] = $E[$j];
$E[$j] = $swap;
$i = $j;
}
else { $i = $n; }
}
}
sub makeheap # complexity: O(n ld n)
{
my($n) = @_;
my($k);
for ( $k = $n - 1; $k > 0; --$k ) { &heapify($k, $n); }
}
# The following subroutine isn't used by this algorithm, it is only included
# here for the sake of completeness:
sub heapsort # complexity: O(n ld n)
{
my($n) = @_;
my($k,$swap);
for ( $k = $n - 1; $k > 0; --$k ) { &heapify($k, $n); }
for ( $k = $n; $k > 1; --$k )
{
$swap = $E[1];
$E[1] = $E[$k];
$E[$k] = $swap;
&heapify(1, $k - 1);
}
}
# (Disjoint!) sets are stored as trees in an array in this algorithm. Each
# element of some set (a cell in the array) contains a pointer to (the number
# of) another element, up to the root element that does not point anywhere,
# but contains the (negative) number of elements the set contains. The number
# of the root element is also used as an identifier for the set.
#
# Example:
#
# i : | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
# -------------+-----+-----+-----+-----+-----+-----+-----+-----+
# parent[i] : | -4 | -3 | 1 | 2 | 1 | -1 | 3 | 4 |
#
# This array contains the three sets S1, S2 and S6:
#
# 1 2 6
# / \ |
# 3 5 4
# / |
# 7 8
#
# "find" returns the number of the root element (= the identifier of the set)
# of the tree in which the given element is contained:
#
# find(a) := i so that a in Si
#
# It also reduces the height of that tree by changing all the pointers from
# the given element up to the root element to point DIRECTLY to the root
# element.
#
# Example:
#
# find(7) returns "1" and modifies the array as follows:
#
# i : | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
# -------------+-----+-----+-----+-----+-----+-----+-----+-----+
# parent[i] : | -4 | -3 | 1 | 2 | 1 | -1 | 1 | 4 |
#
# 1 2 6
# /|\ |
# 3 5 7 4
# |
# 8
#
# "union" takes the identifiers of two sets (= the numbers of their respective
# root elements) and merges the two sets by appending one of the two trees to
# the other. It always appends the SMALLER set to the LARGER one (to keep the
# height of the resulting tree as small as possible) and updates the number of
# elements contained in the resulting set which is stored in the root element's
# cell of the array.
#
# Example:
#
# union(2,6) does the following:
#
# i : | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
# -------------+-----+-----+-----+-----+-----+-----+-----+-----+
# parent[i] : | -4 | -4 | 1 | 2 | 1 | 2 | 1 | 4 |
#
# 1 2
# /|\ / \
# 3 5 7 4 6
# |
# 8
# complexity for O(n) "find" operations: O( G(n) n )
#
# complexity for one "union" operation: O(1)
#
# complexity for O(n) ( "find" + "union" ) operations: O( G(n) n )
#
# where G(n) := min{ j | F(j) >= n }
#
# and F(j) := 1 for j = 0
# F(j) := 2 ^ F(j-1) for j > 0
#
# also, G(n) <= ld n for all n
sub find
{
my($i) = @_;
my($j,$k,$t);
$j = $i;
while ($VS[$j] > 0) { $j = $VS[$j]; } # find root element (= set identifier)
$k = $i;
while ($k != $j) # height compression of the tree
{
$t = $VS[$k];
$VS[$k] = $j;
$k = $t;
}
return($j);
}
sub union
{
my($i,$j) = @_;
my($x);
$x = $VS[$i] + $VS[$j]; # calculate number of elements in resulting set
if ($VS[$i] > $VS[$j]) # which of the two sets contains more elements?
{
$VS[$i] = $j; # merge them
$VS[$j] = $x; # update number of elements
}
else
{
$VS[$j] = $i; # merge them
$VS[$i] = $x; # update number of elements
}
}
sub kruskal # complexity: O(n ld n) ( where n := |{ Edges }| )
{
my($n) = $number_of_edges;
my($v) = $number_of_vortices;
my($i,$j,$swap);
my($t) = 0;
undef @T;
&makeheap($number_of_edges); # complexity: O(n ld n)
while (($v > 1) && ($n > 0))
{
$swap = $E[1];
$E[1] = $E[$n];
$E[$n] = $swap;
&heapify(1, $n - 1); # complexity: n O(ld n) = O(n ld n)
$i = find(${$E[$n]}{'from'}); # complexity: n ( 2 find + 1 union ) =
$j = find(${$E[$n]}{'to'}); # O( G(n) n ) <= O(n ld n)
if ($i != $j)
{
union($i,$j);
$T[++$t] = $E[$n];
--$v;
}
--$n;
}
return(@T);
}
1;
__END__
-------------------- cut here -------------------- cut here --------------------
Yours,
--
Steffen Beyer
mailto:sb@sdm.de |s |d &|m | software design & management GmbH&Co.KG
phone: +49 89 63812-244 | | | | Thomas-Dehler-Str. 27
fax: +49 89 63812-150 | | | | 81737 Munich, Germany.