Skip to content

Commit

Permalink
initial version of row_lm_f
Browse files Browse the repository at this point in the history
  • Loading branch information
karoliskoncevicius committed Mar 1, 2022
1 parent 57c35a9 commit d1f3e09
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 6 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@ export(row_cosinor)
export(col_cosinor)
export(row_andersondarling)
export(col_andersondarling)
export(row_lm_f)
export(col_lm_f)
8 changes: 4 additions & 4 deletions R/cosinor.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ row_cosinor <- function(x, t, period=24) {

nobs <- rowSums(!is.na(x))

b0 <- rep.int(1, ncol(x))
b1 <- sinpi(2*t/period)
b2 <- cospi(2*t/period)
B <- cbind(b0, b1, b2)
B <- matrix(nrow = ncol(x), ncol = 3)
B[,1] <- rep.int(1, ncol(x))
B[,2] <- if(length(period) > 0) sinpi(2*t/period) else NA
B[,3] <- if(length(period) > 0) cospi(2*t/period) else NA

res <- do_regression(x, B)

Expand Down
104 changes: 104 additions & 0 deletions R/lm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#' F-test for nested linear regression models
#'
#' Performs an F-test comparing two nested linear regression models on each row/column of the input matrix.
#'
#' \code{row_lm_f} - F-test for linear regression on rows.
#' \code{col_lm_f} - F-test for linear regression on columns.
#'
#' @param x numeric matrix.
#' @param m - a model matrix for a linear regression model to be tested.
#' @param null - a null model which the original model will be compared against (default = intercept-only).
#'
#' @return a data.frame where each row contains the results of an F-test
#' performed on the corresponding row/column of x.\cr\cr
#' Each row contains the following information (in order):\cr
#' 1. obs - total number of observations\cr
#' 2. betaN - estimated coefficients (as many as there are column in m)\cr
#' 3. rsquared.model - R-squared of the full model\cr
#' 4. rsquared.null - R-squared of the null model\cr
#' 5. df.model - model terms degrees of freedom\cr
#' 6. df.residual - residual degrees of freedom\cr
#' 7. statistic - F statistic\cr
#' 8. pvalue - p-value\cr
#'
#' @seealso \code{lm()}, \code{anova()}
#'
#' @examples
#' X <- t(iris[,-5])
#' mod <- model.matrix(~ iris$Species)
#' row_lm_f(X, mod)
#'
#' # with specified null model
#' X <- mtcars[,c("mpg", "disp", "qsec")]
#' mod <- model.matrix(~ mtcars$drat + mtcars$cyl + mtcars$wt + mtcars$vs)
#' mod0 <- model.matrix(~ mtcars$cyl + mtcars$wt + mtcars$vs)
#' col_lm_f(X, mod, mod0)
#'
#' @author Karolis Koncevičius
#' @name linearmodel
#' @export
row_lm_f <- function(x, m, null=stats::model.matrix(~ 1, data=data.frame(seq_len(nrow(m))))) {
is.null(x)
is.null(m)

if(is.vector(x))
x <- matrix(x, nrow=1)

if(is.data.frame(x) && all(sapply(x, is.numeric)))
x <- data.matrix(x)

assert_numeric_mat_or_vec(x)
assert_numeric_mat(m)
assert_numeric_mat(null)
assert_ncol_equal_nrow(x, m)
assert_equal_nrow(m, null)
assert_unique_colnames(m)
assert_unique_colnames(null)
assert_nested_model(null, m)
assert_all_in_open_interval(m, -Inf, Inf)

hasinfx <- is.infinite(x)
x[hasinfx] <- NA
hasinfx <- rowSums(hasinfx) > 0


nobs <- rowSums(!is.na(x))

res1 <- do_regression(x, m)
res0 <- do_regression(x, null)

df <- res1$stats$dfmod - res0$stats$dfmod
dfres <- res1$stats$dfres

statistic <- (res0$stats$ssres - res1$stats$ssres) / df
statistic <- statistic / (res1$stats$ssres / dfres)
p <- 1 - stats::pf(statistic, df, dfres)


# TODO: gather all the warnings
w1 <- hasinfx
showWarning(w1, 'had infinite observations that were removed')


# TODO: decide what to do about beta values
rownames(res1$betas) <- paste0("beta.", 1:nrow(res1$betas)-1)

# TODO: add partial r-squared (eta squared?) to the output
# TODO: maybe also return RSS values

rnames <- rownames(x)
if(!is.null(rnames)) rnames <- make.unique(rnames)
data.frame(obs=nobs, t(res1$betas),
rsquared.model=res1$stats$rsq, rsquared.null=res0$stats$rsq,
df.model=df, df.residual=dfres, statistic=statistic, pvalue=p,
row.names=rnames
)
}


#' @rdname linearmodel
#' @export
col_lm_f <- function(x, m, null=stats::model.matrix(~ 1, data=data.frame(seq_len(nrow(m))))) {
row_lm_f(t(x), m, null)
}

26 changes: 25 additions & 1 deletion R/utils.asserts.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ assert_numeric_mat_or_vec <- function(x) {
stop('"', name, '"', ' must be a numeric matrix or vector')
}

assert_numeric_mat <- function(x) {
name <- as.character(substitute(x))
if(is.null(x) || !is.numeric(x) | !is.matrix(x))
stop('"', name, '"', ' must be a numeric matrix')
}

assert_vec_length <- function(x, ...) {
name <- as.character(substitute(x))
lens <- unlist(list(...))
Expand Down Expand Up @@ -59,7 +65,6 @@ assert_all_in_closed_interval <- function(x, min, max) {
stop('all "', name, '" values must be between: ', min, ' and ', max)
}


assert_equal_nrow <- function(x, y) {
namex <- as.character(substitute(x))
namey <- as.character(substitute(y))
Expand All @@ -74,8 +79,27 @@ assert_equal_ncol <- function(x, y) {
stop('"', namex, '" and "', namey, '" must have the same number of columns')
}

assert_ncol_equal_nrow <- function(x, y) {
namex <- as.character(substitute(x))
namey <- as.character(substitute(y))
if(ncol(x) != nrow(y))
stop('number of "', namex, '" columns must be equal to the number of "', namey, '" rows')
}

assert_max_number_of_levels <- function(x, mlevels) {
name <- as.character(substitute(x))
if(is.null(x) || length(stats::na.omit(unique(x))) > mlevels)
stop('"', name, '"', ' must have no more than ', mlevels, ' unique elements')
}

assert_unique_colnames <- function(x) {
name <- as.character(substitute(x))
if(is.null(colnames(x)) || any(duplicated(colnames(x))))
stop('column names of "', name, '"', ' must be unique')
}

assert_nested_model <- function(m0, m1) {
if(!all(colnames(m0) %in% colnames(m1)) || !all.equal(m0, m1[,colnames(m0),drop=FALSE], check.attributes=FALSE)) {
stop('null model must be nested (subset of the full model)')
}
}
2 changes: 1 addition & 1 deletion R/utils.tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ do_pearson <- function(r, df, alt, conf) {

# Obtain statistics for linear regression
do_regression <- function(Y, X) {
betas <- matrix(NA, nrow=3, ncol=nrow(Y))
betas <- matrix(NA, nrow=ncol(X), ncol=nrow(Y))
dfmod <- numeric(nrow(Y))
dfres <- numeric(nrow(Y))
sstot <- numeric(nrow(Y))
Expand Down
64 changes: 64 additions & 0 deletions man/linearmodel.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d1f3e09

Please sign in to comment.