#### $Id: agnes.q,v 1.8 2002/03/04 10:44:45 maechler Exp maechler $
agnes <- function(x, diss = inherits(x, "dist"), metric = "euclidean",
                  stand = FALSE, method = "average")
{
    if(diss) {
        ## check type of input vector
        if(is.na(min(x)))
            stop("NA-values in the dissimilarity matrix not allowed." )
        if(data.class(x) != "dissimilarity") {
            if(!is.numeric(x) || is.na(sizeDiss(x)))
                stop("x is not of class dissimilarity and can not be converted to this class." )
            ## convert input vector to class "dissimilarity"
            class(x) <- "dissimilarity"
            attr(x, "Size") <- sizeDiss(x)
            attr(x, "Metric") <- "unspecified"
        }
        n <- attr(x, "Size")
        dv <- x[lower.to.upper.tri.inds(n)]
        ## prepare arguments for the Fortran call
        dv <- c(0., dv)# "double"
        jp <- 1
        valmd <- double(1)
        jtmd <- integer(1)
        ndyst <- 0
        x2 <- double(n)
        jdyss <- 1
    }
    else {
        ## check input matrix and standardize, if necessary
	x <- data.matrix(x)
	if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
        x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
        ndyst <- if(metric == "manhattan") 2 else 1
        n <- nrow(x2)
        jp <- ncol(x2)
        jtmd <- ifelse(is.na(rep(1, n) %*% x2), -1, 1)
	valmisdat <- min(x2, na.rm=TRUE) - 0.5 #(double) VALue for MISsing DATa
        x2[is.na(x2)] <- valmisdat
        valmd <- rep(valmisdat, jp)
        dv <- double(1 + (n * (n - 1))/2)
        jdyss <- 0
    }
    meth <-
        switch(method,
               average = 1, # default
               single =  2,
               complete= 3,
               ward =	 4,
               weighted= 5)
    ## call Fortran routine
    storage.mode(x2) <- "double"
    storage.mode(valmd) <- "double"
    storage.mode(jtmd) <- "integer"
    res <- .Fortran("twins",
                    as.integer(n),
                    as.integer(jp),
                    x2,
                    dv,
                    dis = double(1 + (n * (n - 1))/2),
                    ok = as.integer(jdyss),
                    valmd,
                    jtmd,
                    as.integer(ndyst),
                    as.integer(1),# jalg = 1 <==> AGNES
                    as.integer(meth),
                    integer(n),
                    ner = integer(n),
                    ban = double(n),
                    ac = as.double(0),
                    merge = matrix(0:0, n - 1, 2), # integer
                    PACKAGE = "cluster")
    if(!diss) {
        ##give warning if some dissimilarities are missing.
        if(res$ok == -1)
            stop("No clustering performed, NA-values in the dissimilarity matrix.\n" )
        ## adapt Fortran output to S:
        ##convert lower matrix, read by rows, to upper matrix, read by rows.
        disv <- res$dis[-1]
        disv[disv == -1] <- NA
        disv <- disv[upper.to.lower.tri.inds(n)]
        class(disv) <- "dissimilarity"
        attr(disv, "Size") <- nrow(x)
        attr(disv, "Metric") <- metric
        attr(disv, "Labels") <- dimnames(x)[[1]]
        ##add labels to Fortran output
        if(length(dimnames(x)[[1]]) != 0)
            order.lab <- dimnames(x)[[1]][res$ner]
    }
    else {
        disv <- x
        ##add labels to Fortran output
        if(length(attr(x, "Labels")) != 0)
            order.lab <- attr(x, "Labels")[res$ner]
    }
    clustering <- list(order = res$ner, height = res$ban[-1], ac = res$ac,
                       merge = res$merge, diss = disv, call = match.call())
    if(exists("order.lab"))
        clustering$order.lab <- order.lab
    if(!diss) {
        x2[x2 == valmisdat] <- NA
        clustering$data <- x2
    }
    class(clustering) <- c("agnes", "twins")
    clustering
}

summary.agnes <- function(object, ...)
{
    class(object) <- "summary.agnes"
    object
}

print.agnes <- function(x, ...)
{
    cat("Merge:\n")
    print(x$merge, ...)
    cat("Order of objects:\n")
    print(if(length(x$order.lab) != 0) x$order.lab else x$order,
          quote = FALSE, ...)
    cat("Height:\n")
    print(x$height, ...)
    cat("Agglomerative coefficient:\n")
    print(x$ac, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

print.summary.agnes <- function(x, ...)
{
    cat("Merge:\n")
    print(x$merge, ...)
    cat("Order of objects:\n")
    print(if(length(x$order.lab) != 0) x$order.lab else x$order,
          quote = FALSE, ...)
    cat("Height:\n")
    print(x$height, ...)
    cat("Agglomerative coefficient:\n")
    print(x$ac, ...)
    cat("\n")
    ## only extra  compared to print.agnes:
    print(x$diss, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

### $Id: clara.q,v 1.10 2002/03/04 10:44:45 maechler Exp maechler $

#### CLARA := Clustering LARge Applications
####
#### Note that the algorithm is O(n), but O(ns^2) where ns == sampsize

### FIXME :
##  should not necessarily keep data in result, because "large" !
##  OTOH, data is used for clusplot.partition()

## Note:  ./plotpart.q	is also working with clara() objects


clara <- function(x, k, metric = "euclidean", stand = FALSE,
		  samples = 5, sampsize = 40 + 2 * k)
{
    ## check type of input matrix and values of input numbers
    x <- data.matrix(x)
    if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
    n <- nrow(x)
    if((k <- as.integer(k)) < 1 || k > n - 1)
	stop("The number of cluster should be at least 1 and at most n-1." )
    if((sampsize <- as.integer(sampsize)) < k)
	stop(paste(c("'sampsize' should be at least", k,
		     "(number of clusters)"), collapse = " "))
    if(n < sampsize)
	stop(paste(c("Number of objects is", n,
		     ", should be at least", sampsize, "(sampsize)"),
		   collapse = " "))
    namx <- dimnames(x)[[1]]
    ## standardize, if necessary
    x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
    ## put info about metric, size and NAs in arguments for the Fortran call
    jp <- ncol(x2)
    jtmd  <- ifelse(is.na(rep(1, n) %*% x2), -1, 1)
    mdata <- is.na(min(x2))

    ## FIXME: The following will go wrong as soon as  min(x2) < -5e15
    valmisdat <- min(x2, na.rm=TRUE) - 0.5 #(double) VALue for MISsing DATa
    x2[is.na(x2)] <- valmisdat

    x3 <- as.double(as.vector(t(x2)))# transposing LARGE x ..not efficient ....
    ## call Fortran routine
    res <- .Fortran("clara",
		    as.integer(n),
		    as.integer(jp),
		    as.integer(k),
		    clu = x3,# transpose (x [n * jp] )
		    nran  = as.integer(samples),
		    nsam  = sampsize,
		    dis	  = double(1 + (sampsize * (sampsize - 1))/2),
		    mdata = as.integer(mdata),
		    valmd = rep(as.double(valmisdat), jp),
		    jtmd  = as.integer(jtmd),
		    ndyst = as.integer(if(metric == "manhattan") 2 else 1),
		    integer(sampsize),
		    integer(sampsize),
		    sample = integer(sampsize),# = nbest
		    integer(k),
		    med = integer(k),# = nrx
		    double(k),
		    double(k),
		    double(k),
		    avdis  = double(k),# = ttbes
		    maxdis = double(k),# = rdbes
		    ratdis = double(k),# = rabes
		    size  = integer(k),# = mtt
		    obj	  = double(1),
		    avsil = double(k),
		    ttsil = double(1),
		    silinf = matrix(0, sampsize, 4),
		    jstop = as.integer(0),
		    double(sampsize),
		    double(sampsize),
		    double(sampsize),
		    integer(sampsize),
		    integer(sampsize),
		    integer(sampsize),
		    integer(sampsize),
		    integer(sampsize),
		    integer(sampsize),
		    PACKAGE = "cluster")
    ## give a warning when errors occured
    if(res$jstop == 1)
	stop("For each sample at least one object was found which could not be assigned to a cluster (because of missing values).")
    if(res$jstop == 2)
	stop("Each of the random samples contains objects between which no distance can be computed.")
    sildim <- res$silinf[, 4]
    ## adapt Fortran output to S:
    ## convert lower matrix, read by rows, to upper matrix, read by rows.
    disv <- res$dis[-1]
    disv[disv == -1] <- NA
    disv <- disv[upper.to.lower.tri.inds(sampsize)]
    class(disv) <- "dissimilarity"
    attr(disv, "Size") <- sampsize
    attr(disv, "Metric") <- metric
    attr(disv, "Labels") <- namx[res$sample]
    ## add labels to Fortran output
    res$med <- x[res$med, ]
    res$clu <- matrix(res$clu, nrow = n, ncol = jp, byrow = TRUE)[, 1]
    if(length(namx) != 0) {
	sildim <- namx[sildim]
	res$sample <- namx[res$sample]
	names(res$clu) <- namx
    }
    ## add dimnames to Fortran output
    clusinf <- cbind(size = res$size, "max_diss" = res$maxdis,
		     "av_diss" = res$avdis, isolation = res$ratdis)

    clustering <- list(sample = res$sample, medoids = res$med,
                       clustering = res$clu, objective = res$obj,
                       clusinfo = clusinf, diss = disv, call = match.call())
    if(k != 1) {
	dimnames(res$silinf) <- list(sildim,
				     c("cluster", "neighbor", "sil_width", ""))
	clustering <- c(clustering,
                        list(silinfo = list(width = res$silinf[, -4],
                             clus.avg.widths = res$avsil[1:k],
                             avg.width = res$ttsil)))
    }
    x2[x2 == valmisdat] <- NA
    clustering$data <- x2
    class(clustering) <- c("clara", "partition")
    clustering
}

print.clara <- function(x, ...)
{
    cat("Best sample:\n");		print(x$sample, quote = FALSE, ...)
    cat("Medoids:\n");			print(x$medoids, ...)
    cat("Clustering vector:\n");	print(x$clustering, ...)
    cat("Objective function:\n");	print(x$objective, ...)
    cat("\nAvailable components:\n");	print(names(x), ...)
    invisible(x)
}

summary.clara <- function(object, ...)
{
    class(object) <- "summary.clara"
    object
}

print.summary.clara <- function(x, ...)
{
    cat("Best sample:\n");		print(x$sample, quote = FALSE, ...)
    cat("Medoids:\n");			print(x$medoids, ...)
    cat("Clustering vector:\n");	print(x$clustering, ...)
    cat("Objective function:\n");	print(x$objective, ...)
    cat("\nNumerical information per cluster:\n")
    print(x$clusinfo, ...)
    if(length(x$silinfo) != 0) {
	cat("\nSilhouette plot information for best sample:\n")
	print(x$silinfo[[1]], ...)
	cat("Average silhouette width per cluster:\n")
	print(x$silinfo[[2]], ...)
	cat("Average silhouette width of best sample:\n")
	print(x$silinfo[[3]], ...)
    }
    cat("\n")
    print(x$diss, ...)
    cat("\nAvailable components:\n");	print(names(x), ...)
    invisible(x)
}

#### $Id: daisy.q,v 1.6 2002/03/04 10:44:45 maechler Exp maechler $
daisy <-
function(x, metric = c("euclidean","manhattan"), stand = FALSE, type = list())
{
    ## check type of input matrix
    if(is.null(dim(x)) || !(is.data.frame(x) || is.numeric(x)))
        stop("x is not a dataframe or a numeric matrix.")
    if(!is.null(tA <- type$asymm) &&
       !all(sapply(lapply(as.data.frame(x[,tA]),
                          function(y) levels(as.factor(y))), length) == 2))
        stop("asymmetric binary variable has more than 2 levels.")
    ## transform variables and construct `type' vector
    type2 <- sapply(x, data.class)
    x <- data.matrix(x)
    n <- nrow(x)
    if(length(type) > 0) {
        if(!is.list(type)) stop("invalid `type'; must be named list")
        tT <- type$ ordratio
        tL <- type$ logratio
        x[, names(type2[tT])] <- codes(as.ordered(x[, names(type2[tT])]))
        x[, names(type2[tL])] <- log10(           x[, names(type2[tL])])
        type2[type$asymm] <- "A"
        type2[tT] 	  <- "T" # was "O" (till 2000-12-14) accidentally !
    }
    type2[type2 == "numeric"] <- "I"
    type2[type2 == "ordered"] <- "O"
    type2[type2 == "factor"] <- "N"
    ## standardize, if necessary
    if(all(type2 == "I")) {
        if(stand)
            x <- scale(x, scale = apply(x, 2,
                          function(y)
                          mean(abs(y - mean(y, na.rm = TRUE)), na.rm = TRUE)))
        jdat <- 2
        metric <- match.arg(metric)
        ndyst <- if(metric == "manhattan") 2 else 1
    }
    else { ## mixed case
        if(!missing(metric))
            warning("`metric' is not used with mixed variables")
        colmin   <- apply(x, 2, min, na.rm = TRUE)
        colrange <- apply(x, 2, max, na.rm = TRUE) - colmin
        x <- scale(x, center = colmin, scale = colrange)
        jdat <- 1
        ndyst <- 0
    }
    ## 	type2 <- paste(type2, collapse = "")
    ## put info about NAs in arguments for the Fortran call
    jtmd <- ifelse(is.na(rep(1, n) %*% x), -1, 1)
    valmisdat <- min(x, na.rm = TRUE) - 0.5
    x[is.na(x)] <- valmisdat
    valmd <- rep(valmisdat, ncol(x))
    ## call Fortran routine
    storage.mode(x) <- "double"
    storage.mode(valmd) <- "double"
    storage.mode(jtmd) <- "integer"
    type3 <- as.integer(match(type2, c('A','S','N','O','I','T')))
    res <- .Fortran("daisy",
                    as.integer(n),
                    as.integer(ncol(x)),
                    x,
                    valmd,
                    jtmd,
                    as.integer(jdat),
                    type3,
                    as.integer(ndyst),
                    dis = double(1 + (n * (n - 1))/2),
                    PACKAGE = "cluster")
    ## adapt Fortran output to S:
    ## convert lower matrix, read by rows, to upper matrix, read by rows.
    disv <- res$dis[-1]
    disv[disv == -1] <- NA
    full <- matrix(0, n, n)
    full[!lower.tri(full, diag = TRUE)] <- disv
    disv <- t(full)[lower.tri(full)]
    ## give warning if some dissimilarities are missimg
    if(is.na(min(disv))) attr(disv, "NA.message") <-
        "NA-values in the dissimilarity matrix !"
    ## construct S object -- "dist" methods are *there* !
    class(disv) <- c("dissimilarity", "dist")
    attr(disv, "Labels") <- dimnames(x)[[1]]
    attr(disv, "Size") <- n
    attr(disv, "Metric") <- ifelse(ndyst == 0, "mixed", metric)
    disv
}

print.dissimilarity <- function(x, ...)
{
    cat("Dissimilarities :\n")
    print(as.vector(x), ...)
    cat("\n")
    if(!is.null(attr(x, "na.message")))
        cat("Warning : ", attr(x, "NA.message"), "\n")
    cat("Metric : ", attr(x, "Metric"), "\n")
    cat("Number of objects : ", attr(x, "Size"), "\n")
    invisible(x)
}

summary.dissimilarity <- function(object, ...)
{
    cat(length(object), "dissimilarities, summarized :\n")
    print(sx <- summary(as.vector(object), ...))
    cat("\n")
    if(!is.null(attr(object, "na.message")))
        cat("Warning : ", attr(object, "NA.message"), "\n")
    cat("Metric : ", attr(object, "Metric"), "\n",
        "Number of objects : ", attr(object, "Size"), "\n", sep="")
    invisible(sx)
}
### $Id: diana.q,v 1.9 2002/03/04 10:44:45 maechler Exp maechler $

diana <- function(x, diss = inherits(x, "dist"),
                  metric = "euclidean", stand = FALSE)
{
    if(diss) {
	## check type of input vector
	if(is.na(min(x)))
	    stop("NA-values in the dissimilarity matrix not allowed.")
	if(data.class(x) != "dissimilarity") {
	    if(!is.numeric(x) || is.na(sizeDiss(x)))
		stop("x is not of class dissimilarity and can not be converted to it.")
	    ## convert input vector to class "dissimilarity"
	    class(x) <- "dissimilarity"
	    attr(x, "Size") <- sizeDiss(x)
	    attr(x, "Metric") <- "unspecified"
	}
	n <- as.integer(attr(x, "Size"))
	dv <- x[lower.to.upper.tri.inds(n)]
	## prepare arguments for the Fortran call
	dv <- c(0., dv)# double
	jp <- as.integer(1)
	valmd <- double(1)
	jtmd <- integer(1)
	ndyst <- 0
	x2 <- double(n)
    }
    else {
        ## check input matrix and standardize, if necessary
	x <- data.matrix(x)
	if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
        x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
	ndyst <- if(metric == "manhattan") 2 else 1
	n <- nrow(x2)
	jp <- ncol(x2)
	jtmd <- as.integer(ifelse(is.na(rep(1, n) %*% x2), -1, 1))
	valmisdat <- min(x2, na.rm=TRUE) - 0.5 #(double) VALue for MISsing DATa
	x2[is.na(x2)] <- valmisdat
	valmd <- rep(valmisdat, jp)
	dv <- double(1 + (n * (n - 1))/2)
    }
    res <- .Fortran("twins",
		    n,
		    jp,
		    x2,
		    dv,
		    dis = double(1 + (n * (n - 1))/2),
		    ok = as.integer(diss), # = jdyss
		    valmd,
		    jtmd,
		    as.integer(ndyst),
                    as.integer(2),# jalg = 2 <==> DIANA
		    as.integer(0),# ~ method
		    integer(n),
		    ner = integer(n),
		    ban = double(n),
		    dc = as.double(0),
                    merge = matrix(0:0, n - 1, 2), # integer
		    PACKAGE = "cluster")
    if(!diss) {
	## give warning if some dissimilarities are missing.
	if(res$ok == -1)
	    stop("No clustering performed, NA's in dissimilarity matrix.\n")
	## adapt Fortran output to S:
	## convert lower matrix, read by rows, to upper matrix, read by rows.
	disv <- res$dis[-1]
	disv[disv == -1] <- NA
	disv <- disv[upper.to.lower.tri.inds(n)]
	class(disv) <- "dissimilarity"
	attr(disv, "Size") <- nrow(x)
	attr(disv, "Metric") <- metric
	attr(disv, "Labels") <- dimnames(x)[[1]]
	## add labels to Fortran output
	if(length(dimnames(x)[[1]]) != 0)
	    order.lab <- dimnames(x)[[1]][res$ner]
    }
    else {
	disv <- x
	## add labels to Fortran output
	if(length(attr(x, "Labels")) != 0)
	    order.lab <- attr(x, "Labels")[res$ner]
    }
    clustering <- list(order = res$ner, height = res$ban[-1], dc = res$dc,
		       merge = res$merge, diss = disv, call = match.call())
    if(exists("order.lab"))
	clustering$order.lab <- order.lab
    if(!diss) {
	x2[x2 == valmisdat] <- NA
	clustering$data <- x2
    }
    class(clustering) <- c("diana", "twins")
    clustering
}

print.diana <- function(x, ...)
{
    cat("Merge:\n")
    print(x$merge, ...)
    cat("Order of objects:\n")
    print(if (length(x$order.lab) != 0) x$order.lab else x$order,
	  quote = FALSE, ...)
    cat("Height:\n")
    print(x$height, ...)
    cat("Divisive coefficient:\n")
    print(x$dc, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

summary.diana <- function(object, ...)
{
    class(object) <- "summary.diana"
    object
}

print.summary.diana <- function(x, ...)
{
    cat("Merge:\n")
    print(x$merge, ...)
    cat("Order of objects:\n")
    print(if (length(x$order.lab) != 0) x$order.lab else x$order,
	  quote = FALSE, ...)
    cat("Height:\n")
    print(x$height, ...)
    cat("Divisive coefficient:\n")
    print(x$dc, ...)
    cat("\n")
    print(x$diss, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}
#### ellipsoidhull : Find (and optionally draw)
#### -----------   the smallest ellipsoid containining a set of points
####
#### Just making the algorithms in clusplot() available more generally
#### ( --> ./plotpart.q )

### Author: Martin Maechler, Date: 21 Jan 2002, 15:41


ellipsoidhull <-
    function(x, tol = 0.01, maxit = 5000, ret.wt = FALSE, ret.sqdist = FALSE)
{
    if(!is.matrix(x) || !is.numeric(x))
        stop("`x' must be numeric  n x p matrix")
    if(any(ina <- is.na(x))) {
        warning("omitting NAs")
        x <- na.omit(x)
    }
    n <- nrow(x)
    if(n == 0) stop("no points without missing values")
    p <- ncol(x)

    res <- .Fortran("spannel",
                    n,
                    ndep= p,
                    dat = cbind(1., x),
                    sqdist = double(n),
                    l1 = double((p+1) ^ 2),
                    double(p),
                    double(p),
                    prob = double(n),
                    double(p+1),
                    eps = as.double(tol),
                    maxit = as.integer(maxit),
                    ierr = as.integer(0),
                    PACKAGE = "cluster")
    if(res$ierr != 0)
        cat("Error in Fortran routine computing the spanning ellipsoid,",
            "\n probably collinear data\n", sep="")
    if(res$maxit == maxit)
        warning("possibly not converged in", maxit, "iterations")

    cov <- cov.wt(x, res$prob)
    ## cov.wt() in R has extra wt[] scaling; revert here
    res <- list(loc = cov$center,
                cov = cov$cov * (1 - sum(cov$wt^2)),
                d2  = weighted.mean(res$sqdist, res$prob),
                sqdist = if(ret.sqdist) res$sqdist,
                wt  = if(ret.wt) cov$wt,
                tol = tol,
                eps = max(res$sqdist) - p,
                it  = res$maxit)

    class(res) <- "ellipsoid"
    res
}

print.ellipsoid <- function(x, digits = max(1, getOption("digits") - 2), ...)
{
    d <- length(x$loc)
    cat("`ellipsoid' in", d, "dimensions:\n center = (",
        format(x$loc, digits=digits),
        "); squared ave.radius d^2 = ", format(x$d2, digits=digits),
        "\n and shape matrix =\n")
    print(x$cov, digits = digits, ...)
    cat("  hence,",if(d==2)"area" else "volume"," = ",
        format(volume(x), digits=digits),"\n")
    invisible(x)
}

volume <- function(object) UseMethod("volume")
volume.ellipsoid <- function(object) {
    A <- object$cov
    pi * object$d2 * sqrt(det(A))
}

## For p = 2 :
##   Return (x[i],y[i]) points, i = 1:n, on boundary of ellipse, given
##   by 2 x 2 matrix A[], origin `loc' and d(xy, loc) ^2 = `d2'
ellipsoidPoints <- function(A, d2, loc, n = 201)
{
    if(length(d <- dim(A)) != 2 || (p <- d[1]) != d[2])
        stop("`A' must be p x p  cov-matrix defining an ellipsoid")
    if(p == 2) {
        detA <- A[1, 1] * A[2, 2] - A[1, 2]^2
        yl2 <- A[2, 2] * d2
        y <- seq( - sqrt(yl2), sqrt(yl2), leng = n)
        sqrt.discr <- sqrt(detA * pmax(0, yl2 - y^2))/A[2, 2]
        sqrt.discr[c(1, n)] <- 0
        b <- loc[1] + A[1, 2]/A[2, 2] * y
        y <- loc[2] + y
        return(rbind(cbind(    b - sqrt.discr,      y),
                     cbind(rev(b + sqrt.discr), rev(y))))
    } else { ## p >= 3
        detA <- det(A)
        ##-- need something like polar coordinates
        stop("ellipsoidPoints() not yet implemented for p >= 3 dim.")
    }
}

predict.ellipsoid <- function(object, n.out = 201, ...)
    ellipsoidPoints(object$cov, d2 = object$d2, loc= object$loc, n = n.out)
#### $Id: fanny.q,v 1.8 2002/03/04 10:44:45 maechler Exp maechler $
fanny <- function(x, k, diss = inherits(x, "dist"),
                  metric = "euclidean", stand = FALSE)
{
    if(diss) {
	## check type of input vector
	if(is.na(min(x)))
	    stop("NA-values in the dissimilarity matrix not allowed.")
	if(data.class(x) != "dissimilarity") {
	    if(!is.numeric(x) || is.na(sizeDiss(x)))
		stop("x is not of class dissimilarity and can not be converted to this class." )
	    ## convert input vector to class "dissimilarity"
	    class(x) <- "dissimilarity"
	    attr(x, "Size") <- sizeDiss(x)
	    attr(x, "Metric") <- "unspecified"
	}
	## prepare arguments for the Fortran call
	n <- attr(x, "Size")
	dv <- as.double(c(x, 0))
	jp <- 1
	valmd <- double(1)
	jtmd <- integer(1)
	ndyst <- 0
	x2 <- double(n)
	jdyss <- 1
    }
    else {
        ## check input matrix and standardize, if necessary
	x <- data.matrix(x)
	if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
        x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
	## put info about metric, size and NAs in arguments for the Fortran call
	ndyst <- if(metric == "manhattan") 2 else 1
	n <- nrow(x2)
	jp <- ncol(x2)
	jtmd <- ifelse(is.na(rep(1, n) %*% x2), -1, 1)
	valmisdat <- min(x2, na.rm=TRUE) - 0.5 #(double) VALue for MISsing DATa
	x2[is.na(x2)] <- valmisdat
	valmd <- rep(valmisdat, jp)
	jdyss <- 0
	dv <- double(1 + (n * (n - 1))/2)
    }
    if((k <- as.integer(k)) < 1 || k > n%/%2 - 1)
        stop("`k' (number of clusters) must be in {1,2, .., n/2 -1}")
    ## call Fortran routine
    storage.mode(x2) <- "double"
    storage.mode(jtmd) <- "integer"
    res <- .Fortran("fanny",
		    as.integer(n),
		    as.integer(jp),
		    k,
		    x2,
		    dis = dv,
		    ok = as.integer(jdyss),
		    valmd,
		    jtmd,
		    as.integer(ndyst),
		    integer(n),
		    integer(n),
		    integer(n),
		    double(n),
		    p = matrix(0., n, k),
		    matrix(0., n, k),
		    avsil = double(k),
		    integer(k),
		    double(k),
		    double(k),
		    double(n),
		    ttsil = as.double(0),
		    eda = as.double(0),
		    edb = as.double(0),
		    obj = double(2),
		    clu = integer(n),
		    silinf = matrix(0., n, 4),
		    as.double(1e-15),
		    PACKAGE = "cluster")
    sildim <- res$silinf[, 4]
    if(diss) {
	disv <- x
	## add labels to Fortran output
	if(length(attr(x, "Labels")) != 0) {
	    sildim <- attr(x, "Labels")[sildim]
	    dimnames(res$p) <- list(attr(x, "Labels"), NULL)
	    names(res$clu) <- attr(x, "Labels")
	}
    }
    else {
	## give warning if some dissimilarities are missing.
	if(res$ok == -1)
	    stop("No clustering performed, NA-values in the dissimilarity matrix.")
	disv <- res$dis[ - (1 + (n * (n - 1))/2)]
	disv[disv == -1] <- NA
	class(disv) <- "dissimilarity"
	attr(disv, "Size") <- nrow(x)
	attr(disv, "Metric") <- metric
	attr(disv, "Labels") <- dimnames(x)[[1]]
	## add labels to Fortran output
	if(length(dimnames(x)[[1]]) != 0) {
	    sildim <- dimnames(x)[[1]][sildim]
	    dimnames(res$p) <- list(dimnames(x)[[1]], NULL)
	    names(res$clu) <- dimnames(x)[[1]]
	}
    }
    ## add dimnames to Fortran output
    names(res$obj) <- c("iterations", "objective")
    res$coeff <- c(res$eda, res$edb)
    names(res$coeff) <- c("dunn_coeff", "normalized")

    clustering <- list(membership = res$p, coeff = res$coeff,
                       clustering = res$clu, objective = res$obj,
                       diss = disv, call = match.call())
    if(k != 1) {
	dimnames(res$silinf) <- list(sildim,
				     c("cluster", "neighbor", "sil_width", ""))
	clustering <- c(clustering,
                        list(silinfo = list(widths = res$silinf[, -4],
                             clus.avg.widths = res$avsil[1:k],
                             avg.width = res$ttsil)))
    }
    if(!diss) {
	x2[x2 == valmisdat] <- NA
	clustering$data <- x2
    }
    class(clustering) <- c("fanny", "partition")
    clustering
}

print.fanny <- function(x, ...)
{
    print(x$objective, ...)
    cat("Membership coefficients:\n")
    print(x$membership, ...)
    cat("Coefficients:\n")
    print(x$coeff, ...)
    cat("Closest hard clustering:\n")
    print(x$clustering, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

summary.fanny <- function(object, ...)
{
    class(object) <- "summary.fanny"
    object
}

print.summary.fanny <- function(x, ...)
{
    print(x$objective, ...)
    cat("Membership coefficients:\n")
    print(x$membership, ...)
    cat("Coefficients:\n")
    print(x$coeff, ...)
    cat("Closest hard clustering:\n")
    print(x$clustering, ...)
    if(length(x$silinfo) != 0) {
	cat("\nSilhouette plot information:\n")
	print(x$silinfo[[1]], ...)
	cat("Average silhouette width per cluster:\n")
	print(x$silinfo[[2]], ...)
	cat("Average silhouette width of total data set:\n")
	print(x$silinfo[[3]], ...)
    }
    cat("\n")
    print(x$diss, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

#### Cluster - Internal Utilities
#### ============================ (new by Martin Mchler)

## This was size(); seems slightly useful in general
sizeDiss <- function(d)
{
    ## find `n' for d == dissimilarity-like(<n obs.>), i.e. length(d)= n(n-1)/2
    discr <- 1 + 8 * length(d)
    sqrtdiscr <- round(sqrt(discr))
    if(sqrtdiscr^2 == discr) (1 + sqrtdiscr)/2 else NA
}

## used in  ./agnes.q, ./clara.q,  ./diana.q  und ./pam.q :

lower.to.upper.tri.inds <- function(n)
{
    n1 <- as.integer(n - 1)
    if(n1 < 1) stop("`n' must be >= 2")
    else if(n1 == 1) 1:1
    else rep(1:n1, 1:n1) +
        c(0, unlist(lapply(2:n1, function(k) cumsum(c(0, (n - 2):(n - k))))))
}

upper.to.lower.tri.inds <- function(n)
{
    if((n2 <- as.integer(n - 2)) < 0) stop("`n' must be >= 2")
    rep(1 + cumsum(0:n2), (n - 1):1) +
	unlist(lapply(0:n2, function(k) cumsum(k:n2)))
}

#### consider to *not* export these when I will use a name space :

meanabsdev <- function(y) mean(abs(y - mean(y, na.rm = TRUE)), na.rm = TRUE)
mona <- function(x)
{
    levs <- function(y) levels(as.factor(y))

    ## check type of input matrix
    if(!is.matrix(x) && !is.data.frame(x))
        stop("x must be a matrix or data frame.")
    if(!all(sapply(lapply(as.data.frame(x), levs), length) == 2))
        stop(message = "All variables must be binary (factor with 2 levels).")
    n <- nrow(x)
    jp <- ncol(x)
    ##change levels of input matrix
    x2 <- apply(as.matrix(x), 2, factor)
    x2[x2 == "1"] <- "0"
    x2[x2 == "2"] <- "1"
    x2[is.na(x2)] <- "2"
    ##	x2 <- paste(x2, collapse = "")
    ##	storage.mode(x2) <- "character"
    ## call Fortran routine
    storage.mode(x2) <- "integer"
    res <- .Fortran("mona",
                    as.integer(n),
                    as.integer(jp),
                    x2 = x2,
                    error = as.integer(0),
                    nban = integer(n),
                    ner = integer(n),
                    integer(n),
                    lava = integer(n),
                    integer(jp),
                    PACKAGE = "cluster")
    ## give a warning when errors occured
    if(res$error != 0) {
        ch <- "No clustering performed,"
        switch(res$error,
               ## 1 :
               stop(paste(ch,"an object was found with all values missing.")),
               ## 2 :
               stop(paste(ch,"a variable was found with at least 50% missing values.")),
               ## 3 :
               stop(paste(ch,"a variable was found with all non missing values identical.")),
               ## 4 :
               stop(paste(ch,"all variables have at least one missing value."))
               )
    }
    res$x2 <- matrix(as.numeric(substring(res$x2,
                                          1:nchar(res$x2), 1:nchar(res$x2))),
                     n, jp)
    dimnames(res$x2) <- dnx <- dimnames(x)
    ## add labels to Fortran output
    if(length(dnx[[2]]) != 0) {
        lava <- as.character(res$lava)
        lava[lava != "0"] <- dnx[[2]][res$lava]
        lava[lava == "0"] <- "NULL"
        res$lava <- lava
    }
    ## construct "mona" object
    clustering <- list(data = res$x2, order = res$ner,
                       variable = res$lava[ -1 ], step = res$nban[-1],
                       call = match.call())
    if(length(dnx[[1]]) != 0)
        clustering$order.lab <- dnx[[1]][res$ner]
    class(clustering) <- "mona"
    clustering
}

print.mona <- function(x, ...)
{
    cat("Revised data:\n")
    print(x$data, quote = FALSE, ...)
    cat("Order of objects:\n")
    print(if (length(x$order.lab) != 0) x$order.lab else x$order,
          quote = FALSE, ...)
    cat("Variable used:\n")
    print(x$variable, quote = FALSE, ...)
    cat("Separation step:\n")
    print(x$step, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

summary.mona <- function(object, ...)
{
    class(object) <- "summary.mona"
    object
}

print.summary.mona <- function(x, ...)
{
    print.mona(x, ...)
    invisible(x)
}

#### PAM : Partitioning Around Medoids
#### --- $Id: pam.q,v 1.11 2002/03/04 10:44:45 maechler Exp maechler $
pam <- function(x, k, diss = inherits(x, "dist"),
                metric = "euclidean", stand = FALSE)
{
    if(diss) {
	## check type of input vector
	if(is.na(min(x)))
	    stop("NA-values in the dissimilarity matrix not allowed.")
	if(data.class(x) != "dissimilarity") {
	    if(!is.numeric(x) || is.na(sizeDiss(x)))
		stop("x is not of class dissimilarity and can not be converted to this class." )
	    ## convert input vector to class "dissimilarity"
	    class(x) <- "dissimilarity"
	    attr(x, "Size") <- sizeDiss(x)
	    attr(x, "Metric") <- "unspecified"
	}
	## adapt S dissimilarities to Fortran:
	## convert upper matrix, read by rows, to lower matrix, read by rows.
	n <- attr(x, "Size")
	dv <- x[lower.to.upper.tri.inds(n)]
	## prepare arguments for the Fortran call
	dv <- c(0, dv)
	jp <- 1
	valmd <- double(1)
	jtmd <- integer(1)
	ndyst <- 0
	x2 <- double(n)
    }
    else {
        ## check input matrix and standardize, if necessary
	x <- data.matrix(x)
	if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
        x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
	## put info about metric, size and NAs in arguments for the Fortran call
	ndyst <- if(metric == "manhattan") 2 else 1
	n <- nrow(x2)
	jp <- ncol(x2)
	jtmd <- as.integer(ifelse(is.na(rep(1, n) %*% x2), -1, 1))
	valmisdat <- min(x2, na.rm=TRUE) - 0.5 #(double) VALue for MISsing DATa
	x2[is.na(x2)] <- valmisdat
	valmd <- rep(valmisdat, jp)
	dv <- double(1 + (n * (n - 1))/2)
    }
    if((k <- as.integer(k)) < 1 || k >= n)
        stop("The number of clusters `k' must be in {1,2, .., n-1}.")
    ## call Fortran routine
    storage.mode(dv) <- "double"
    storage.mode(x2) <- "double"
    storage.mode(valmd) <- "double"
    res <- .Fortran("pam",
		    as.integer(n),
		    as.integer(jp),
		    k,
		    x = x2,
		    dys = dv,
                    jdyss = as.integer(diss),# 0/1
		    valmd,
		    jtmd,
		    as.integer(ndyst),
		    integer(n),
		    integer(n),
		    integer(n),
		    double(n),
		    double(n),
		    avsil = double(n),
		    double(n),
		    ttsil = as.double(0),
		    med = integer(k),
		    obj = double(2),
		    clu = integer(n),
		    clusinf = matrix(0., k, 5),
		    silinf = matrix(0., n, 4),
		    isol = integer(k),
		    PACKAGE = "cluster")
    sildim <- res$silinf[, 4]
    if(diss) {
	disv <- x
	## add labels to Fortran output
	if(length(attr(x, "Labels")) != 0) {
	    sildim <- attr(x, "Labels")[sildim]
	    names(res$clu) <- attr(x, "Labels")
	    res$med <- attr(x, "Labels")[res$med]
	}
    }
    else {
	## give warning if some dissimilarities are missing.
	if(res$jdyss == -1)
	    stop("No clustering performed, NA-values in the dissimilarity matrix.")
	## adapt Fortran output to S:
	## convert lower matrix, read by rows, to upper matrix, read by rows.
	disv <- res$dys[-1]
	disv[disv == -1] <- NA
	disv <- disv[upper.to.lower.tri.inds(n)]
	class(disv) <- "dissimilarity"
	attr(disv, "Size") <- nrow(x)
	attr(disv, "Metric") <- metric
	attr(disv, "Labels") <- dimnames(x)[[1]]
	## add labels to Fortran output
	res$med <- x[res$med,  ]
	if(length((dimnames(x)[[1]])) != 0) {
	    sildim <- dimnames(x)[[1]][sildim]
	    names(res$clu) <- dimnames(x)[[1]]
	}
    }
    ## add dimnames to Fortran output
    names(res$obj) <- c("build", "swap")
    res$isol <- factor(res$isol, levels = c(0, 1, 2),
		       labels = c("no", "L", "L*"))
    names(res$isol) <- 1:k
    dimnames(res$clusinf) <- list(NULL, c("size", "max_diss", "av_diss",
					  "diameter", "separation"))
    ## construct S object
    clustering <-
        list(medoids = res$med, clustering = res$clu,
             objective = res$obj, isolation = res$isol,
             clusinfo = res$clusinf,
             silinfo = if(k != 1) {
                 dimnames(res$silinf) <-
                     list(sildim, c("cluster", "neighbor", "sil_width", ""))
                 list(widths = res$silinf[, -4],
                      clus.avg.widths = res$avsil[1:k],
                      avg.width = res$ttsil)
             },
             diss = disv,
             call = match.call())
    if(!diss) {
	x2[x2 == valmisdat] <- NA
	clustering$data <- x2
    }
    class(clustering) <- c("pam", "partition")
    clustering
}

print.pam <- function(x, ...)
{
    cat("Medoids:\n")
    print(x$medoids, ...)
    cat("Clustering vector:\n")
    print(x$clustering, ...)
    cat("Objective function:\n")
    print(x$objective, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

summary.pam <- function(object, ...)
{
    class(object) <- "summary.pam"
    object
}

print.summary.pam <- function(x, ...)
{
    cat("Medoids:\n")
    print(x$medoids, ...)
    cat("Clustering vector:\n")
    print(x$clustering, ...)
    cat("Objective function:\n")
    print(x$objective, ...)
    cat("\nNumerical information per cluster:\n")
    print(x$clusinfo, ...)
    cat("\nIsolated clusters:\n")
    cat("L-clusters: ")
    print(names(x$isolation[x$isolation == "L"]), quote = FALSE, ...)
    cat("L*-clusters: ")
    print(names(x$isolation[x$isolation == "L*"]), quote = FALSE, ...)
    if(length(x$silinfo) != 0) {
	cat("\nSilhouette plot information:\n")
	print(x$silinfo[[1]], ...)
	cat("Average silhouette width per cluster:\n")
	print(x$silinfo[[2]], ...)
	cat("Average silhouette width of total data set:\n")
	print(x$silinfo[[3]], ...)
    }
    cat("\n")
    print(x$diss, ...)
    cat("\nAvailable components:\n")
    print(names(x), ...)
    invisible(x)
}

### $Id: plothier.q,v 1.9 2002/03/04 10:45:09 maechler Exp maechler $

pltree <- function(x, ...) UseMethod("pltree")

pltree.twins <- function(x, main = paste("Dendrogram of ", deparse(call)), ...)
{
    call <- x$call
    labels <- NULL
    if(length(x$order.lab) != 0) {
        names(x$order) <- names(x$order.lab) <- 1:length(x$order)
        labels <- x$order.lab[names(sort(x$order))]
    }
    x <- list(order = x$order, height = sort(x$height), merge = x$merge)

    if( sapply(R.version[c("major","minor")], as.numeric) %*% c(10,1) >= 12 ) {
        if(is.null(labels))
             plot.hclust(x,                  main = main, ylab = "Height", ...)
        else plot.hclust(x, labels = labels, main = main, ylab = "Height", ...)
    }
    else { ## R <= 1.1
        if(is.null(labels))
             plot.hclust(x,                , ylab = "Height", ...)
        else plot.hclust(x, labels = labels, ylab = "Height", ...)
        title(main = main, ...)
    }
    invisible()
}

## plot.diana() [further down] & plot.agnes() are  almost identical;
##  just the bannerplot differs a bit ....

plot.agnes <- function(x, ask = FALSE, which.plots = NULL,
                       main = NULL,
                       sub = paste("Agglomerative Coefficient = ",
                                   round(x$ac, digits = 2)),
                       adj = 0, nmax.lab = 35, max.strlen = 5, ...)
{
    bannerplot <- function(x, ...)
    {
        w <- rev(x$height)
        m <- max(x$height)
        w <- rbind(w, m - w)
        barplot(w, xlab = "Height", horiz = TRUE, inside = FALSE,
                space = 0, axes = FALSE, col = c(0, 2),
                mgp = c(2.5, 1, 0), ...)
        title(main = main1, sub = sub, adj = adj)
        flrm <- floor(m); ss <- seq(0, flrm, length = 11)
        at.vals <- c(ss, m)
        lab.vals<- c(ss, round(m, digits = 2))
        axis(1, at = at.vals, labels = lab.vals, ...)
        if(length(x$order) < nmax.lab) {
            names <- if (length(x$order.lab) != 0)
                substring(rev(x$order.lab), 1, max.strlen)
            else rev(x$order)
            axis(4, at = 0:(length(x$order) - 1),
                 labels = names, pos = m, mgp = c(3, 1.25, 0), ...)
        }
    }

    if(is.null(main)) {
        ## Different default for banner & pltree:
        cl <- deparse(x$call)
        main1 <- paste("Banner of ", cl)
        main2 <- paste("Dendrogram of ", cl)
    }
    else { # same title for both
        main1 <- main2 <- main
    }

    if(is.null(which.plots) && !ask)
        which.plots <- 1:2
    if(ask && is.null(which.plots)) { ## Use `menu' ..
        tmenu <- paste("plot ", ## choices :
                       c("All", "Banner", "Clustering Tree"))
        do.all <- FALSE
        repeat {
            if(!do.all)
                pick <- menu(tmenu, title =
                             "\nMake a plot selection (or 0 to exit):\n") + 1
            switch(pick,
                   return(invisible()), # 0 -> exit loop
                   do.all <- TRUE,# 1 : All
                   bannerplot(x, ...),# 2
                   pltree(x, main = main2, sub = sub, ...)
                   )
            if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1}
        }
    }
    else {
        ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive()
        if(ask) {
            op <- par(ask = TRUE)
            on.exit(par(op))
        }
        for(i in which.plots)
        switch(i,
               bannerplot(x, ...),
               pltree    (x, main = main2, sub = sub, ...)
               )
    }
    invisible()
}

plot.diana <-
function(x, ask = FALSE, which.plots = NULL, main = NULL,
         sub  = paste("Divisive Coefficient = ", round(x$dc, digits = 2)),
         adj = 0, nmax.lab = 35, max.strlen = 5, ...)
{
    bannerplot <- function(x, ...)
    {
        w <- rev(x$height)
        m <- max(x$height)
        w <- rbind(m - w, w)
        barplot(w, xlab = "Height", horiz = TRUE, inside = FALSE,
                space = 0, axes = FALSE, col = c(2, 0),
                mgp = c(2.5, 1, 0), ...)
        title(main = main1, sub = sub, adj = adj)
        flrm <- floor(m); ss <- seq(0, flrm, length = 11)
        at.vals <- c(0, ss + m - flrm)
        lab.vals <- c(round(m, digits = 2), ss)
        axis(1, at = at.vals, labels = lab.vals, ...)
        if(length(x$order) < nmax.lab) {
            names <- if (length(x$order.lab) != 0)
                substring(rev(x$order.lab), 1, max.strlen)
            else rev(x$order)
            axis(2, at = 0:(length(x$order) - 1),
                 labels = names, pos = 0, mgp = c(3, 1.5, 0), ...)
        }
    }

    if(is.null(main)) {
        ## Different default for banner & pltree:
        cl <- deparse(x$call)
        main1 <- paste("Banner of ", cl)
        main2 <- paste("Dendrogram of ", cl)
    }
    else { # same title for both
        main1 <- main2 <- main
    }

    if(is.null(which.plots) && !ask)
        which.plots <- 1:2
    if(ask && is.null(which.plots)) { ## Use `menu' ..
        tmenu <- paste("plot ", ## choices :
                       c("All", "Banner", "Clustering Tree"))
        do.all <- FALSE
        repeat {
            if(!do.all)
                pick <- menu(tmenu, title =
                             "\nMake a plot selection (or 0 to exit):\n") + 1
            switch(pick,
                   return(invisible()), # 0 -> exit loop
                   do.all <- TRUE,# 1 : All
                   bannerplot(x, ...),# 2
                   pltree(x, main = main2, sub = sub, ...)
                   )
            if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1}
        }
    }
    else {
        ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive()
        if(ask) {
            op <- par(ask = TRUE)
            on.exit(par(op))
        }
        for(i in which.plots)
        switch(i,
               bannerplot(x, ...),# i = 1
               pltree    (x, main = main2, sub = sub, ...) # i = 2
               )
    }
    invisible()
}

plot.mona <- function(x, main = paste("Banner of ", deparse(x$call)),
                      col = 2, axes = TRUE, adj = 0,
                      nmax.lab = 35, max.strlen = 5, ...)
{
    w <- rev(x$step)
    w[w==0] <- max(w)+1
    m <- max(w)
    barplot(rbind(w, m - w), xlab = "Separation step", horiz = TRUE,
            inside = FALSE, space = 0, axes = FALSE,
            col = c(col, 0), mgp = c(2.5, 1, 0), ...)
    title(main = main, adj = adj, ...)
    if(axes) axis(1, at = 0:m, labels = 0:m, ...)
    if(length(x$order) < nmax.lab) {
        names <- if (length(x$order.lab) != 0)
            substring(rev(x$order.lab), 1, max.strlen)
        else rev(x$order)
        if(axes)
            axis(2, at = 0:(length(x$order) - 1), labels = names, pos = 0,
                 mgp = c(3, 1.5, 0), las = 1, ...)
    }
    names <- rev(x$variable)
    names[rev(x$step) == 0] <- ""
    text(w, 0:(length(x$order) - 2) + 0.5, labels = paste(" ", names),
         adj = adj, col = col, ...)
    invisible()
}
### $Id: plotpart.q,v 1.12 2002/01/23 18:20:33 maechler Exp $
plot.partition <-
function(x, ask = FALSE, which.plots = NULL,
         nmax.lab = 40, max.strlen = 5,
         cor = TRUE, stand = FALSE, lines = 2,
         shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE,
         span = TRUE, xlim = NULL, ylim = NULL, ...)
{
    silhouPlot <- function(x, nmax.lab, max.strlen) {
        if(length(x$silinfo) == 0)
            stop("No silhouette plot available when the number of clusters equals 1." )
        s <- rev(x$silinfo[[1]][, 3])
        space <- c(0, rev(diff(x$silinfo[[1]][, 1])))
        space[space != 0] <- 0.5
        names <- if(length(s) < nmax.lab)
            substring(rev(dimnames(x$silinfo[[1]])[[1]]), 1, max.strlen)
        barplot(s, space = space, names = names,
                xlab = "Silhouette width",
                xlim = c(min(0, min(s)), 1), horiz = TRUE,
                mgp = c(2.5, 1, 0), ...)
        title(main = paste("Silhouette plot of ",
              deparse(x$call)),
              sub = paste("Average silhouette width : ",
              round(x$ silinfo$avg.width, digits = 2)), adj = 0)
    }

    if(is.null(which.plots) && !ask)
        which.plots <- 1:2
    if(ask && is.null(which.plots)) { ## Use `menu' ..
        tmenu <- paste("plot ", ## choices :
                       c("All", "Clusplot", "Silhouette Plot"))
        do.all <- FALSE
        repeat {
            if(!do.all)
                pick <- menu(tmenu, title =
                             "\nMake a plot selection (or 0 to exit):\n") + 1
            switch(pick,
                   return(invisible())# 0 -> exit loop
                   ,
                   do.all <- TRUE# 1 : All
                   ,
                   clusplot(x, cor = cor, stand = stand, lines = lines,
                            shade = shade, color = color, labels = labels,
                            plotchar = plotchar, span = span,
                            xlim = xlim, ylim = ylim, ...)
                   ,
                   silhouPlot(x, nmax.lab, max.strlen)
                   )
            if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1}
        }
    }
    else {
        ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive()
        if(ask) { op <- par(ask = TRUE); on.exit(par(op)) }
        for(i in which.plots)
        switch(i,
               clusplot(x, cor = cor, stand = stand, lines = lines,
                        shade = shade, color = color, labels = labels,
                        plotchar = plotchar, span = span,
                        xlim = xlim, ylim = ylim, ...)
               ,
               silhouPlot(x, nmax.lab, max.strlen)
               )
    }
    invisible()
}

clusplot <- function(x, ...) UseMethod("clusplot")


clusplot.default <-
function(x, clus, diss = FALSE, cor = TRUE, stand = FALSE, lines = 2,
         shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE,
         col.p = "dark green", # was 5 (= shaded col)
         col.txt = col.p,
         span = TRUE, xlim = NULL, ylim = NULL,
         main = paste("CLUSPLOT(", deparse(substitute(x)),")"),
         verbose = getOption("verbose"),
         ...)
{
    if(paste(R.version$major, R.version$minor, sep=".") < 1.5) {
        ## a simplified (add = T) version of R 1.5's cmdscale():
        cmdscale <- function (d, k = 2, add = TRUE, ...) {
            if (any(is.na(d)))
                stop("NA values not allowed in d")
            if (is.null(n <- attr(d, "Size"))) {
                d <- as.matrix(d)
                x <- d^2
                if ((n <- nrow(x)) != ncol(x))
                    stop("Distances must be result of dist or a square matrix")
            }
            else {
                x <- matrix(0, n, n)
                if(add) d0 <- x
                x[row(x) > col(x)] <- d^2
                x <- x + t(x)
                if(add) {
                    d0[row(x) > col(x)] <- d
                    d <- d0 + t(d0)
                }
            }
            storage.mode(x) <- "double"
            x <- .C("dblcen", x=x, as.integer(n), PACKAGE="mva")$x
            if(add) { ## solve the additive constant problem
                i2 <- n + (i <- 1:n)
                Z <- matrix(0, 2*n, 2*n)
                Z[cbind(i2,i)] <- -1
                Z[ i, i2] <- -x
                Z[i2, i2] <- .C("dblcen", x= 2*d, as.integer(n),PACKAGE="mva")$x
                e <- La.eigen(Z,symmetric = FALSE, only.val = TRUE)$values
                add.c <- max(Re(e))
                x <- matrix(double(n*n), n, n)
                non.diag <- row(d) != col(d)
                x[non.diag] <- (d[non.diag] + add.c)^2
            }
            e <- La.eigen(-x/2, symmetric = TRUE)
            ev <- e$values[1:k]
            points <- e$vectors[, 1:k] %*% diag(sqrt(ev), k)
            rn <- if(is.matrix(d)) rownames(d) else names(d)
            dimnames(points) <- list(rn, NULL)
            evalus <- e$values[-n]
            list(points = points, eig = ev, ac = if(add) add.c else 0,
                 GOF = sum(ev)/c(sum(abs(evalus)),
                                 sum(evalus[evalus > 0])))
        }
    }## cmdscale() -- if R version < 1.5

    clas.snijpunt <- function(x, loc, m, n, p)
    {
        if(     loc[n, m] <= x[1, m] && x[1, m] <= loc[p, m]) x[1, ]
        else if(loc[n, m] <= x[2, m] && x[2, m] <= loc[p, m]) x[2, ]
        else NA
    }
    coord.snijp1 <- function(x, gemid)
        x[2, 2] - 2 * x[1, 2] * gemid + x[1, 1] * gemid^2
    coord.snijp2 <- function(x, d2, y)
        ((x[1, 1] * x[2, 2] - x[1, 2]^2) * d2)/y
    coord.snijp3 <- function(xx, y, gemid)
    {
        sy <- sqrt(y)
        sy <- c(sy, -sy)
        cbind(xx[1] + sy,
              xx[2] + gemid*sy)
    }

    ## BEGIN ----

    (main)# eval
    if(is.data.frame(x))
        x <- data.matrix(x)
    if(!is.numeric(x))
        stop("x is not numeric")

    if(diss) {
        if(is.na(min(x)))
            stop(message = "NA-values in x are not allowed.")
        if((data.class(x)) != "dissimilarity") {
            if(is.na(sizeDiss(x))) {
                if((n <- nrow(x)) != ncol(x))
                    stop("Distances must be result of dist or a square matrix.")
                if(all.equal(x, t(x)) != TRUE)
                    stop("the square matrix is not symmetric.")
                labels1 <-
                    if(length(dimnames(x)[[1]]) == 0) 1:nrow(x)
                    else dimnames(x)[[1]]
            }
            else {
                if(!is.vector(x)) {
                    labels1 <- attr(x, "Labels") # possibly NULL
                    x <- as.matrix(x)
                    if((n <- nrow(x)) == ncol(x) &&
                       all.equal(x, t(x)) == TRUE) {
                        labels1 <-
                            if(length(dimnames(x)[[1]]) == 0) 1:nrow(x)
                            else dimnames(x)[[1]]
                    }
                    else {
                        if(is.null(labels1))
                            labels1 <- 1:sizeDiss(x)
                        attr(x, "Size") <- sizeDiss(x)
                    }
                }
                else {
                    attr(x, "Size") <- sizeDiss(x)
                    labels1 <- 1:sizeDiss(x)
                }
            }
        }
        else {
            labels1 <-
                if(length(attr(x, "Labels")) == 0)
                    1:attr(x, "Size")
                else attr(x, "Labels")
        }
        x1 <- cmdscale(x, k = 2, eig = TRUE, add = TRUE)
        if(x1$ac < 0)
            x1 <- cmdscale(x, k = 2, eig = TRUE)
        var.dec <- x1$GOF[2] # always in [0,1]
        x1 <- x1$points
    }
    else { ## Not (diss)
        if(!is.matrix(x)) stop("x is not a data matrix")
        if(is.na(min(x))) { ## any(is.na(x))
            y <- is.na(x)
            y1 <- apply(y, 1, sum)
            y2 <- apply(y, 2, sum)
            if(any(y1 == ncol(x)))
                stop("one or more objects contain only missing values")
            if(any(y2 == nrow(x)))
                stop("one or more variables contain only missing values")
            x <- apply(x, 2, function(x)
                   { x[is.na(x)] <- median(x, na.rm = TRUE); x } )
            cat("Missing values were displaced by the median of the corresponding variable(s)\n")

        }

        labels1 <-
            if(length(dimnames(x)[[1]]) == 0) 1:nrow(x)
            else dimnames(x)[[1]]

        if(ncol(x) == 1) {
            hulp <- rep(0, length(x))
            x1 <- matrix(c(t(x), hulp), ncol = 2)
            var.dec <- 1
        }
        else {
            prim.pr <- princomp(x, scores = TRUE, cor = ncol(x) != 2)
            var.dec <- cumsum(prim.pr$sdev^2/sum(prim.pr$ sdev^2))[2]
            x1 <- prim.pr$scores
            x1 <- cbind(x1[, 1], x1[, 2])
        }
    }

    ## --- The 2D space is setup and points are in x1[,]  (aantal x 2) ---

    clus <- as.vector(clus)
    if(length(clus) != length(x1[, 1]))
        stop("The clustering vector has not the good length")
    clus <- as.factor(clus)
    if(sum(is.na(clus)) != 0)
        stop("NA-values are not allowed in clustering vector")
    if(stand)
        x1 <- scale(x1)

    rangx <- range(x1[, 1])
    rangy <- range(x1[, 2])
    minx <- rangx[1]
    maxx <- rangx[2]
    miny <- rangy[1]
    maxy <- rangy[2]
    levclus <- levels(clus)
    n <- length(levclus) # the number of clusters
    z <- A <- vector("list", n)
    maxima <- loc <- matrix(0, nrow = n, ncol = 2)
    d2 <- verhoud <- numeric(n)
    verhouding <- 0
    ## num1 .. num6 : all used only once -- there are more constants anyway
    num3 <- 90
    num6 <- 70

    for(i in 1:n) { ##-------------  i-th cluster  --------------
	x <- x1[clus == levclus[i],, drop = FALSE ]
        aantal <- nrow(x) # number of observations in cluster [i]
        cov <- var(if(aantal == 1) {
                     if(verbose)
                         cat("cluster",i," has only one observation ..\n")
                     rbind(x, c(0, 0))
                   } else x)
        x.1 <- range(x[, 1])
        y.1 <- range(x[, 2])
        notrank2 <- qr(cov, tol = 0.001)$rank != 2
        if(!span && notrank2) {
            d2[i] <- 1
            if((abs(diff(x.1)) > (diff(rangx)/70)) ||
               (abs(diff(y.1)) > (diff(rangy)/50))) {
                loc[i, ] <- c(x.1[1] + diff(x.1)/2, y.1[1] + diff(y.1)/2)
                a <- sqrt((loc[i, 1] - x.1[1])^2 +
                          (loc[i, 2] - y.1[1])^2)
                a <- a + 0.05 * a
                num2 <- 40
                if(abs(diff(x.1)) > diff(rangx)/70 ) {
                    ind1 <- which.max(x[,1])
                    ind2 <- which.min(x[,1])
                    q <- atan((x[ind1, 2] - x[ind2, 2])/
                              (x[ind1, 1] - x[ind2, 1]))
                    b <-
                        if(diff(rangy) == 0)
                            1
                        else if(abs(diff(y.1)) > diff(rangy)/50)
                            diff(y.1)/10 ## num1 <- 10
                        else diff(rangy)/num2
                }
                else {
                    b <- if(diff(rangx) == 0) 1 else diff(rangx)/num2
                    q <- pi/2
                }
                D <- diag(c(a^2, b^2))
                R <- rbind(c(cos(q), -sin(q)),
                           c(sin(q),  cos(q)))
                A[[i]] <- (R %*% D) %*% t(R)
            }
            else {
                a <- diff(rangx)/num3
                b <- diff(rangy)/num6
                if(a == 0) a <- 1
                if(b == 0) b <- 1
                A[[i]] <- diag(c(a^2, b^2))
                loc[i, ] <- x[1, ]
            }
            oppervlak <- pi * a * b
        }
        else if(span && notrank2) {
            d2[i] <- 1
            if(sum(x[, 1] != x[1, 1]) != 0 ||
               sum(x[, 2] != x[1, 2]) != 0) {
                loc[i, ] <- c(x.1[1] + diff(x.1)/2,
                              y.1[1] + diff(y.1)/2)
                a <- sqrt((loc[i, 1] - x.1[1])^2 +
                          (loc[i, 2] - y.1[1])^2)
                if(any(x[, 1] != x[1, 1])) {
                    ind1 <- which.max(x[,1])
                    ind2 <- which.min(x[,1])
                    q <- atan((x[ind1, 2] - x[ind2, 2])/
                              (x[ind1, 1] - x[ind2, 1]))
                }
                else {
                    q <- pi/2
                }
                b <- 1e-7
                D <- diag(c(a^2, b^2))
                R <- rbind(c(cos(q), -sin(q)),
                           c(sin(q),  cos(q)))
                A[[i]] <- (R %*% D) %*% t(R)
            }
            else {
                a <- diff(rangx)/num3
                b <- diff(rangy)/num6
                if(a == 0) a <- 1
                if(b == 0) b <- 1
                A[[i]] <- diag(c(a^2, b^2))
                loc[i, ] <- x[1, ]
            }
            oppervlak <- pi * a * b

        }
        else { ## rank2
            if(!span) {
                loc[i, ] <- apply(x, 2, mean)
                d2[i] <- max(mahalanobis(x, loc[i, ], cov))
                ## * (1+ 0.01)^2  --- dropped factor for back-compatibility
            }
            else { ## span and rank2
                if(verbose)
                    cat("span & rank2 : calling \"spannel\" ..\n")
                k <- as.integer(2)
                res <- .Fortran("spannel",
                                aantal,
                                ndep= k,
                                dat = cbind(1., x),
                                sqdist = double(aantal),
                                l1 = double((k+1) ^ 2),
                                double(k),
                                double(k),
                                prob = double(aantal),
                                double(k+1),
                                eps = as.double(0.01),## convergence tol.
                                maxit = as.integer(5000),
                                ierr = as.integer(0),
                                PACKAGE = "cluster")
                if(res$ierr != 0)
                    ## MM : exactmve not available here !
                    cat("Error in Fortran routine for the spanning ellipsoid,",
                        "\n rank problem??\n", sep="")

                cov <- cov.wt(x, res$prob)
                loc[i, ] <- cov$center
                ## NB: cov.wt() in R has extra wt[] scaling; revert here:
                cov <- cov$cov * (1 - sum(cov$wt^2))
                d2[i] <- weighted.mean(res$sqdist, res$prob)

                if(verbose)
                    cat("ellipse( A= (", format(cov[1,]),"*", format(cov[2,2]),
                        "),\n\td2=", format(d2[i]),
                        ", loc[]=", format(loc[i, ]), ")\n")
            }
            A[[i]] <- cov
            ## oppervlak (flam.)  =  area (Engl.)
            oppervlak <- pi * d2[i] * sqrt(cov[1, 1] * cov[2, 2] - cov[1, 2]^2)
        }

        z[[i]] <- ellipsoidPoints(A[[i]], d2[i], loc[i, ], n= 201)
        maxima[i, ] <- z[[i]][201, ]
        rx <- range(z[[i]][, 1])
        ry <- range(z[[i]][, 2])
        minx <- min(minx, rx[1])
        maxx <- max(maxx, rx[2])
        miny <- min(miny, ry[1])
        maxy <- max(maxy, ry[2])
        verhoud[i] <- aantal/oppervlak
        if(verhoud[i] < 1e7)
            verhouding <- verhouding + verhoud[i]
    } ## end for( i-th cluster )

    if(verhouding == 0)
        verhouding <- 1
    ## num4 <- 37 ; num5 <- 3 --- but `41' is another constant
    density <- 3 + (verhoud * 37)/verhouding
    density[density > 41] <- 41
    if (span) {
        if (rangx[1] == rangx[2]) { ## diff(rangx)==0 : x-coords all the same
            minx <- x1[1, 1] - 1
            maxx <- x1[1, 1] + 1
        }
        if (rangy[1] == rangy[2]) { ## diff(rangy)==0 : y-coords all the same
            miny <- x1[1, 2] - 1
            maxy <- x1[1, 2] + 1
        }
    }
    if(!is.null(xlim)) {
        if(xlim[1] < minx) minx <- xlim[1]
        if(xlim[2] > maxx) maxx <- xlim[2]
    }
    if(!is.null(ylim)) {
        if(ylim[1] < miny) miny <- ylim[1]
        if(ylim[2] > maxy) maxy <- ylim[2]
    }

    ## --- Now plotting starts ---

    plot(x1[, 1], x1[, 2], xlim = c(minx, maxx), ylim = c(miny, maxy),
         xlab = "Component 1", ylab = "Component 2",
         main = main,
         type = if(plotchar) "n" else "p", # if(plotchar) add points later
         col = col.p, ...)
    title(sub = paste("These two components explain",
          round(100 * var.dec, digits = 2), "% of the point variability."),
          adj = 0)

    if(color) {
        color1 <- c(2, 4, 6, 3)
        i.verh <- order(verhoud)
        jInd <- if(n > 4) pam(verhoud[i.verh], 4)$clustering else 1:n
        for(i in 1:n) {
            k <- i.verh[i]
            polygon(z[[k]], density = if(shade) density[k] else 0,
                    col = color1[jInd[i]], ...)
        }
    }
    else {
        for(i in 1:n)
            polygon(z[[i]], density = if(shade) density[i] else 0,
                    col = 5, ...)
    }

    ## points after polygon in order to write ON TOP:
    if(plotchar) {
        karakter <- 1:19
        for(i in 1:n) {
            x <- x1[clus == levclus[i],  , drop = FALSE]
            kar <- 1+(i-1) %% 19
            points(x[, 1], x[, 2], pch = karakter[kar], col = col.p, ...)
        }
    }

    if((lines == 1 || lines == 2) && n > 1) {
        ## Draw lines between all pairs of the  n  cluster (centers)
        afstand <- matrix(0, ncol = n, nrow = n)
        for(i in 1:(n - 1)) {
            for(j in (i + 1):n) {
                gemid <- (loc[j, 2] - loc[i, 2])/(loc[j, 1] - loc[i, 1])
                s0 <- coord.snijp1(A[[i]], gemid)
                b0 <- coord.snijp2(A[[i]], d2[i], s0)
                snijp.1 <- coord.snijp3(loc[i,], y=b0, gemid)
                s1 <- coord.snijp1(A[[j]], gemid)
                b1 <- coord.snijp2(A[[j]], d2[j], s1)
                snijp.2 <- coord.snijp3(loc[j,], y=b1, gemid)
                if(loc[i, 1] != loc[j, 1]) {
                    if(loc[i, 1] < loc[j, 1]) {
                        punt.1 <- clas.snijpunt(snijp.1, loc, 1, i, j)
                        punt.2 <- clas.snijpunt(snijp.2, loc, 1, i, j)
                    }
                    else {
                        punt.1 <- clas.snijpunt(snijp.1, loc, 1, j, i)
                        punt.2 <- clas.snijpunt(snijp.2, loc, 1, j, i)
                    }
                }
                else {
                    if(loc[i, 2] < loc[j, 2]) {
                        punt.1 <- clas.snijpunt(snijp.1, loc, 2, i, j)
                        punt.2 <- clas.snijpunt(snijp.2, loc, 2, i, j)
                    }
                    else {
                        punt.1 <- clas.snijpunt(snijp.1, loc, 2, j, i)
                        punt.2 <- clas.snijpunt(snijp.2, loc, 2, j, i)
                    }
                }
                if(is.na(punt.1[1]) || is.na(punt.2[1]) ||
                   (sqrt((punt.1[1] - loc[i, 1])^2 +
                         (punt.1[2] - loc[i, 2])^2) +
                    sqrt((punt.2[1] - loc[j, 1])^2 +
                         (punt.2[2] - loc[j, 2])^2)) >
                   sqrt((loc[j, 1] - loc[i, 1])^2 +
                        (loc[j, 2] - loc[i, 2])^2))
                {
                    afstand[i, j] <- NA
                }
                else if(lines == 1) {
                    afstand[i, j] <- sqrt((loc[i, 1] - loc[j, 1])^2 +
                                          (loc[i, 2] - loc[j, 2])^2)
                    segments(loc[i, 1], loc[i, 2],
                             loc[j, 1], loc[j, 2], col = 6, ...)
                }
                else { ## lines == 2
                    afstand[i, j] <- sqrt((punt.1[1] - punt.2[1])^2 +
                                          (punt.1[2] - punt.2[2])^2)
                    segments(punt.1[1], punt.1[2],
                             punt.2[1], punt.2[2], col = 6, ...)
                }
            }
        }
        afstand <- t(afstand) + afstand
    }
    else afstand <- NULL

    ## FIXME: The following is *not* elegant..
    if(labels == 1) {
        for(i in 1:n) {
            x1 <- rbind(x1, z[[i]][cumsum(rep(10, 40)), ])
            labels1 <- c(labels1, rep(levclus[i], 40))
        }
        identify(x1[, 1], x1[, 2], labels1, col = col.txt)
    }
    else if(labels == 2) {
        x1 <- rbind(x1, maxima)
        labels1 <- c(labels1, levclus)
        x1[, 1] <- x1[, 1] + (maxx - minx)/130
        x1[, 2] <- x1[, 2] + (maxy - miny)/50
        text(x1, labels = labels1, col = col.txt)
    }
    else if(labels == 3) {
        x1[, 1] <- x1[, 1] + (maxx - minx)/130
        x1[, 2] <- x1[, 2] + (maxy - miny)/50
        text(x1, labels = labels1, col = col.txt)
    }
    else if(labels == 4) {
        maxima[, 1] <- maxima[, 1] + (maxx - minx)/ 130
        maxima[, 2] <- maxima[, 2] + (maxy - miny)/ 50
        text(maxima, labels = levclus, col = col.txt)
    }

    density[density == 41] <- NA
    invisible(list(Distances = afstand, Shading = density))
}

clusplot.partition <- function(x, main = NULL, ...)
{
    if(is.null(main) && !is.null(x$call))
	main <- paste("clusplot(",format(x$call),")", sep="")
    if(length(x$data) != 0 &&
       (!is.na(min(x$data)) || data.class(x) == "clara"))
	clusplot.default(x$data, x$clustering, diss = FALSE, main = main, ...)
    else clusplot.default(x$diss, x$clustering, diss = TRUE, main = main, ...)

}
.First.lib <- function(lib, pkg) {
  require(mva)
  library.dynam("cluster", pkg, lib)
}
