Skip to content

Commit 6b90c29

Browse files
committed
fix: improve RT deviation estimation for subset alignment
- This fixes issue #795 by using `approxfun()` instead of `stepfun()` for the subset-based alignment (and only there!). For adjustment of chrom peaks' retention times we still use the `stepfun()` that will ensure the RT will coincide with the RT of an actual spectrum (not interpolated).
1 parent 67d516c commit 6b90c29

File tree

6 files changed

+167
-51
lines changed

6 files changed

+167
-51
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Package: xcms
2-
Version: 4.7.0
2+
Version: 4.7.1
33
Title: LC-MS and GC-MS Data Analysis
44
Description: Framework for processing and visualization of chromatographically
55
separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF,

NEWS.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1+
# xcms 4.7
2+
3+
## Changes in version 4.7.1
4+
5+
- Fix retention time deviation estimation for subset-based alignment.
6+
17
# xcms 4.5
28

39
## Changes in version 4.5.4
410

511
- Replace usage of deprecated (and removed) class `NAnnotatedDataFrame` with
612
`AnnotatedDataFrame`.
713
- Fix a bug in `manualChromPeaks()` that caused an error when only a single
8-
chrom peak was added.
14+
chrom peak was added.
915

1016
## Changes in version 4.5.3
1117

R/XcmsExperiment-plotting.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, lwd = 1,
123123
peak_group_adj <- peak_group
124124
for (i in seq_len(ncol(peak_group)))
125125
peak_group_adj[, i] <- .applyRtAdjustment(peak_group[, i],
126-
rt[[i]], rtadj[[i]])
126+
rt[[i]], rtadj[[i]],
127+
method = "approxfun")
127128
diff_rt <- peak_group_adj - peak_group
128129
if (adjustedRtime)
129130
xrt <- peak_group_adj

R/do_adjustRtime-functions.R

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,9 @@ do_adjustRtime_peakGroups <-
322322
#'
323323
#' @param rtadj `numeric` with adjusted retention times.
324324
#'
325+
#' @param method `character(1)` either `"stepfun"` (the default) or
326+
#' `"approxfun"` to avoid the artifacts observed in issue #
327+
#'
325328
#' @noRd
326329
#'
327330
#' @author Johannes Rainer
@@ -338,14 +341,19 @@ do_adjustRtime_peakGroups <-
338341
#' ## adjFts[, c("rt", "rtmin", "rtmax")] <- .applyRtAdjustment(feats[, c("rt", "rtmin", "rtmax")], rtr, rtc)
339342
#'
340343
#' ## To revert the adjustment: just switch the order of rtr and rtc
341-
.applyRtAdjustment <- function(x, rtraw, rtadj) {
344+
.applyRtAdjustment <- function(x, rtraw, rtadj,
345+
method = c("stepfun", "approxfun")) {
346+
method <- match.arg(method)
342347
## re-order everything if rtraw is not sorted; issue #146
343348
if (is.unsorted(rtraw)) {
344349
idx <- order(rtraw)
345350
rtraw <- rtraw[idx]
346351
rtadj <- rtadj[idx]
347352
}
348-
adjFun <- stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj)
353+
if (method == "approxfun")
354+
adjFun <- approxfun(rtraw, rtadj)
355+
else
356+
adjFun <- stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj)
349357
res <- adjFun(x)
350358
## Fix margins.
351359
idx_low <- which(x < rtraw[1])
@@ -375,12 +383,16 @@ do_adjustRtime_peakGroups <-
375383
stop("'rtraw' and 'rtadj' have to have the same length!")
376384
## Going to adjust the columns rt, rtmin and rtmax in x.
377385
## Using a for loop here.
386+
## Note: we are using `"stepfun"` here on purpose, to be consistent with
387+
## the original code, and as it will adjust retention times to the actual
388+
## adjusted retention times, not mean or interpolated ones.
378389
for (i in seq_along(rtraw)) {
379390
whichSample <- which(x[, "sample"] == i)
380391
if (length(whichSample) && any(rtraw[[i]] != rtadj[[i]])) {
381392
x[whichSample, c("rt", "rtmin", "rtmax")] <-
382393
.applyRtAdjustment(x[whichSample, c("rt", "rtmin", "rtmax")],
383-
rtraw = rtraw[[i]], rtadj = rtadj[[i]])
394+
rtraw = rtraw[[i]], rtadj = rtadj[[i]],
395+
method = "stepfun")
384396
}
385397
}
386398
x
@@ -535,6 +547,11 @@ do_adjustRtime_peakGroups <-
535547
#' @param method `character` specifying the method with which the non-subset
536548
#' samples are adjusted: either `"previous"` or `"average"`. See details.
537549
#'
550+
#' @param adjFun `character(1)` defining the function that should be used to
551+
#' estimate the retention time deviation. Can be either
552+
#' `adjFun = "approxfun"` (default) or `adjFun = "stepfun"` (which was the
553+
#' default).
554+
#'
538555
#' @return `list` of adjusted retention times.
539556
#'
540557
#' @author Johannes Rainer
@@ -543,26 +560,28 @@ do_adjustRtime_peakGroups <-
543560
#'
544561
#' @md
545562
adjustRtimeSubset <- function(rtraw, rtadj, subset,
546-
method = c("average", "previous")) {
563+
method = c("average", "previous"),
564+
adjFun = c("approxfun", "stepfun")) {
547565
method <- match.arg(method)
566+
adjFun <- match.arg(adjFun)
548567
if (length(rtraw) != length(rtadj))
549568
stop("Lengths of 'rtraw' and 'rtadj' have to match.")
550569
if (missing(subset))
551570
subset <- seq_along(rtraw)
552571
if (!all(subset %in% seq_along(rtraw)))
553572
stop("'subset' is out of bounds.")
554-
## if (length(subset) == length(rtraw)) {
555-
## cat("return rtadj\n")
556-
## return(rtadj)
557-
## }
558573
no_subset <- seq_len(length(rtraw))[-subset]
574+
message("Aligning samples against subset")
575+
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
576+
"total (:percent) in ",
577+
":elapsed"),
578+
total = (length(no_subset)), clear = FALSE)
559579
for (i in no_subset) {
560-
message("Aligning sample number ", i, " against subset ... ",
561-
appendLF = FALSE)
562580
if (method == "previous") {
563581
i_adj <- .get_closest_index(i, subset, method = "previous")
564582
rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rtraw[[i_adj]],
565-
rtadj[[i_adj]])
583+
rtadj[[i_adj]],
584+
method = adjFun)
566585
}
567586
if (method == "average") {
568587
i_ref <- c(.get_closest_index(i, subset, method = "previous"),
@@ -576,9 +595,9 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset,
576595
rt_raw_ref <- apply(rt_raw_ref, 1, weighted.mean, w = wghts)
577596
rt_adj_ref <- apply(rt_adj_ref, 1, weighted.mean, w = wghts)
578597
rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rt_raw_ref,
579-
rt_adj_ref)
598+
rt_adj_ref, method = adjFun)
580599
}
581-
message("OK")
600+
pb$tick()
582601
}
583602
rtadj
584603
}

tests/testthat/test_do_adjustRtime-functions.R

Lines changed: 114 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,6 @@ test_that("do_adjustRtime_peakGroups works", {
4545
})
4646

4747
test_that("applyRtAdjustment works", {
48-
skip_on_os(os = "windows", arch = "i386")
49-
50-
xs <- faahko
51-
## group em.
52-
## xsg <- group(xs)
53-
## ## align em.
54-
## xsa <- retcor(xsg, method = "peakgroups")
5548
pksAdj <- .applyRtAdjToChromPeaks(chromPeaks(xod_xg),
5649
rtraw = rtime(xod_xg, bySample = TRUE),
5750
rtadj = rtime(xod_xgr, bySample = TRUE))
@@ -73,16 +66,86 @@ test_that("applyRtAdjustment works", {
7366
## Artificial examples.
7467
a_raw <- c(1, 2, 3, 5, 6, 7, 8, 10, 12, 13, 14, 16)
7568
a_adj <- a_raw + 2 # shift by 2
76-
b <- .applyRtAdjustment(a_raw, a_raw, a_adj)
69+
b <- .applyRtAdjustment(a_raw, a_raw, a_adj, method = "approxfun")
7770
expect_equal(a_adj, b)
7871
b_2 <- .applyRtAdjustment(a_raw, a_raw[4:8], a_adj[4:8])
7972
expect_equal(b, b_2)
73+
x <- c(2, 3, 5, 6, 8)
74+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "approxfun")
75+
expect_equal(res, x + 2)
76+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "stepfun")
77+
expect_equal(res, x + 2)
8078

8179
a_adj <- a_raw - 2
8280
b <- .applyRtAdjustment(a_raw, a_raw, a_adj)
8381
expect_equal(a_adj, b)
8482
b_2 <- .applyRtAdjustment(a_raw, a_raw[4:8], a_adj[4:8])
8583
expect_equal(b, b_2)
84+
85+
## Difference between stepfun (old default) and approxfun.
86+
a_raw <- seq(1, 100, by = 0.3)
87+
a_adj <- a_raw + 0.2
88+
x <- seq(4, 20, by = 0.3)
89+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "stepfun")
90+
expect_equal(res, x + 0.2)
91+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "approxfun")
92+
expect_equal(res, x + 0.2)
93+
94+
x <- seq(1.4, 20, by = 0.3)
95+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "stepfun")
96+
## expect_equal(res, x + 0.2) # error!
97+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "approxfun")
98+
expect_equal(res, x + 0.2)
99+
100+
a_adj <- a_raw + 1.3
101+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "stepfun")
102+
## expect_equal(res, x + 1.3) # error!
103+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "approxfun")
104+
expect_equal(res, x + 1.3)
105+
106+
## small increments
107+
a_raw <- seq(0.1, 100, by = 0.03)
108+
a_adj <- sort(a_raw + 0.2)
109+
110+
res <- .applyRtAdjustment(a_raw, a_raw, a_adj, method = "stepfun")
111+
expect_equal(a_adj, res)
112+
expect_equal(quantile(diff(a_adj)), quantile(diff(res)))
113+
114+
set.seed(123)
115+
x <- seq(2.02, 90.02, by = 0.03)
116+
x <- sort(x + rnorm(length(x), 0, 0.001))
117+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "stepfun")
118+
## expect_equal(res, x + 0.2)
119+
## plot(res, res - x, type = "l")
120+
121+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "approxfun")
122+
expect_equal(quantile(diff(res)), quantile(diff(x)))
123+
expect_equal(res, x + 0.2)
124+
## plot(res, res - x, type = "l")
125+
126+
## deviation is smaller than diff
127+
a_raw <- seq(0.1, 100, by = 1.2)
128+
a_adj <- sort(a_raw + 0.2)
129+
130+
res <- .applyRtAdjustment(a_raw, a_raw, a_adj, method = "stepfun")
131+
expect_equal(a_adj, res)
132+
expect_equal(diff(a_adj), diff(res))
133+
134+
## Now, that's an issue.
135+
## we should! have a constant shift by 0.2
136+
x <- seq(2, 90, by = 1.2)
137+
x <- sort(x + rnorm(length(x), mean = 0, sd = 0.001))
138+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "stepfun")
139+
## plot(res, res - x, type = "l")
140+
## For a constant shift we expect the difference between consecutive values
141+
## to stay the same, but the test below fails.
142+
## expect_equal(diff(x), diff(res))
143+
144+
res <- .applyRtAdjustment(x, a_raw, a_adj, method = "approxfun")
145+
expect_equal(diff(x), diff(res))
146+
expect_equal(mean(res - x), 0.2)
147+
expect_equal(res, x + 0.2)
148+
## plot(res, res - x, type = "l")
86149
})
87150

88151
test_that(".get_closest_index works", {
@@ -136,42 +199,65 @@ test_that(".match_trim_vectors and index works", {
136199
})
137200

138201
test_that("adjustRtimeSubset works", {
139-
skip_on_os(os = "windows", arch = "i386")
140-
141202
rt_raw <- rtime(xod_xgr, adjusted = FALSE, bySample = TRUE)
142203
rt_adj <- rtime(xod_xgr, adjusted = TRUE, bySample = TRUE)
143204

144-
res <- adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3),
145-
method = "previous")
205+
res <- xcms:::adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3),
206+
method = "previous", adjFun = "stepfun")
146207
expect_equal(res[[1]], rt_adj[[1]])
147208
expect_equal(res[[3]], rt_adj[[3]])
148209
expect_true(all(res[[2]] != rt_adj[[2]]))
149210
expect_equal(names(res[[2]]), names(rt_adj[[2]]))
150211
expect_equal(unname(res[[2]]), unname(rt_adj[[1]]))
151212

152-
a <- res[[1]] - rt_raw[[1]]
153-
b <- res[[2]] - rt_raw[[2]]
154-
c <- res[[3]] - rt_raw[[3]]
155-
plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2,
156-
ylim = range(a, b, c))
157-
points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1)
158-
points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2)
213+
res <- xcms:::adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3),
214+
method = "previous", adjFun = "approxfun")
215+
expect_equal(res[[1]], rt_adj[[1]])
216+
expect_equal(res[[3]], rt_adj[[3]])
217+
expect_true(all(res[[2]] != rt_adj[[2]]))
218+
## Values are no longer IDENTICAL, but highly similar:
219+
expect_true(median(res[[2]] - rt_adj[[1]]) == 0)
220+
expect_true(max(abs(res[[2]] - rt_adj[[1]])) < 0.002)
221+
222+
## a <- res[[1]] - rt_raw[[1]]
223+
## b <- res[[2]] - rt_raw[[2]]
224+
## c <- res[[3]] - rt_raw[[3]]
225+
## plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2,
226+
## ylim = range(a, b, c))
227+
## points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1)
228+
## points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2)
229+
230+
res <- xcms:::adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3),
231+
method = "average", adjFun = "stepfun")
232+
expect_equal(res[[1]], rt_adj[[1]])
233+
expect_equal(res[[3]], rt_adj[[3]])
234+
expect_true(all(res[[2]] != rt_adj[[2]]))
235+
expect_true(all(res[[2]] != rt_adj[[1]]))
236+
expect_true(all(res[[2]] != rt_adj[[3]]))
237+
238+
## a <- res[[1]] - rt_raw[[1]]
239+
## b <- res[[2]] - rt_raw[[2]]
240+
## c <- res[[3]] - rt_raw[[3]]
241+
## plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2,
242+
## ylim = range(a, b, c))
243+
## points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1)
244+
## points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2)
159245

160-
res <- adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3),
161-
method = "average")
246+
res <- xcms:::adjustRtimeSubset(rt_raw, rt_adj, subset = c(1, 3),
247+
method = "average", adjFun = "approxfun")
162248
expect_equal(res[[1]], rt_adj[[1]])
163249
expect_equal(res[[3]], rt_adj[[3]])
164250
expect_true(all(res[[2]] != rt_adj[[2]]))
165251
expect_true(all(res[[2]] != rt_adj[[1]]))
166252
expect_true(all(res[[2]] != rt_adj[[3]]))
167253

168-
a <- res[[1]] - rt_raw[[1]]
169-
b <- res[[2]] - rt_raw[[2]]
170-
c <- res[[3]] - rt_raw[[3]]
171-
plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2,
172-
ylim = range(a, b, c))
173-
points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1)
174-
points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2)
254+
## a <- res[[1]] - rt_raw[[1]]
255+
## b <- res[[2]] - rt_raw[[2]]
256+
## c <- res[[3]] - rt_raw[[3]]
257+
## plot(res[[1]], a, type = "l", col = "#ff000040", lty = 2,
258+
## ylim = range(a, b, c))
259+
## points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1)
260+
## points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2)
175261
})
176262

177263
test_that(".adjustRtime_peakGroupsMatrix works", {

tests/testthat/test_methods-XCMSnExp.R

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,7 +1619,6 @@ test_that("calibrate,XCMSnExp works", {
16191619
})
16201620

16211621
test_that("adjustRtime,peakGroups works", {
1622-
skip_on_os(os = "windows", arch = "i386")
16231622

16241623
xod <- faahko_xod
16251624
xodg <- groupChromPeaks(
@@ -1694,19 +1693,24 @@ test_that("adjustRtime,peakGroups works", {
16941693
rtime(xodg, bySample = TRUE)[[2]]))
16951694
expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] !=
16961695
rtime(xodg, bySample = TRUE)[[3]]))
1697-
expect_equal(unname(rtime(res_sub, bySample = TRUE)[[1]]),
1698-
unname(rtime(res_sub, bySample = TRUE)[[2]]))
1696+
## With `adjFun = "approxfun"` it's no longer **identical** but highly
1697+
## similar!
1698+
expect_true(max(abs(rtime(res_sub, bySample = TRUE)[[1L]] -
1699+
rtime(res_sub, bySample = TRUE)[[2L]])) < 0.0015)
1700+
## expect_equal(unname(rtime(res_sub, bySample = TRUE)[[1]]),
1701+
## unname(rtime(res_sub, bySample = TRUE)[[2]]))
16991702
expect_equal(rtime(res_sub, bySample = TRUE)[[2]],
17001703
.applyRtAdjustment(rtime(xodg, bySample = TRUE)[[2]],
1701-
rtime(xodg, bySample = TRUE)[[1]],
1702-
rtime(res_sub, bySample = TRUE)[[1]]))
1704+
rtime(xodg, bySample = TRUE)[[1]],
1705+
rtime(res_sub, bySample = TRUE)[[1]],
1706+
method = "approxfun"))
17031707
res_sub <- adjustRtime(
17041708
xodg, param = PeakGroupsParam(subset = c(1, 3),
17051709
subsetAdjust = "average"))
17061710
expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] !=
17071711
rtime(xodg, bySample = TRUE)[[1]]))
1708-
expect_true(all(rtime(res_sub, bySample = TRUE)[[2]] !=
1709-
rtime(xodg, bySample = TRUE)[[2]]))
1712+
expect_false(all(rtime(res_sub, bySample = TRUE)[[2]] ==
1713+
rtime(xodg, bySample = TRUE)[[2]]))
17101714
expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] !=
17111715
rtime(xodg, bySample = TRUE)[[3]]))
17121716
expect_true(any(rtime(res_sub, bySample = TRUE)[[1]] !=

0 commit comments

Comments
 (0)