I have a set of data x
which consists of 12 columns and 167 rows. The first column is compound Id for each row. I want to run a t.test
for 3 column
Another alternative is using a package.
Your data:
df <- rbind(c(27612820, 22338050, 15359640, 19741350, 18726880, 18510800, 10914980, 12071660, 16036180, 16890860, 16066960, 16364300),
c(7067206, 7172234, 5933320, 136272600, 131596800, 134717600, 6102838, 7186256, 6770344, 140127100, 155341300, 151748000),
c(3151398, 2141378, 1240904, 11522180, 8907711, 9842342, 1677299, 2265826, 2942991, 11690360, 12552660, 12102620)
)
df <- data.frame(df)
rownames(df) <- c("alanine", "arginine", "asparagine")
colnames(df) <- c("AC-1", "AC-2", "AC-3", "AM-1", "AM-2", "AM-3", "SC-1", "SC-2", "SC-3", "SM-1", "SM-2", "SM-3")
Then to run a t-test on every row between AC and SC groups:
library(matrixTests)
> row_t_welch(df[,c("AC-1", "AC-2", "AC-3")], df[,c("SC-1", "SC-2", "SC-3")])
obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr df statistic pvalue conf.low conf.high alternative mean.null conf.level
alanine 3 3 6 21770170 13007607 8762563.3 37776970798900 7213669482133 3872580.5 2.736945 2.26271945 0.1171389 -4259692 21784819 two.sided 0 0.95
arginine 3 3 6 6724253 6686479 37774.0 471939373529 298723602417 506840.9 3.807645 0.07452832 0.9443398 -1397926 1473474 two.sided 0 0.95
asparagine 3 3 6 2177893 2295372 -117478.7 913496858185 401148784303 661978.3 3.472571 -0.17746605 0.8690016 -2070931 1835973 two.sided 0 0.95
The t.test is used to compare two data sets. Collecting two data sets each from three different columns of a matrix can be done like this:
data_a = c(x[,2:4])
data_b = c(x[,4:8])
These two data sets can be evaluated using t.test at this point:
t.test(data_a, data_b)
Collecting the data from three columns each for two different compounds for a given row (amino acid) we modify and add a loop:
x <- matrix(rnorm(24, mean=0, sd=1), 4, ncol=6)
x
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] -0.4810307 0.3996071 0.90663635 0.7487048 0.5787846 2.0231681
[2,] -2.0454921 -0.1225105 -1.04447522 0.9325333 -1.7782776 0.6856150
[3,] -0.3099937 1.2079548 -0.03835271 0.2751349 1.0111554 -0.4862846
[4,] -0.2834953 0.1930481 -0.57968344 0.1204925 -0.5015843 0.3690397
for(i in 1:nrow(x)){
data_a = c(x[i, 1:3])
data_b = c(x[i, 4:6])
print(t.test(data_a, data_b))
}
With this fake data:
df <- data.frame(compound = c("alanine ", "arginine", "asparagine", "aspartate"))
df <- matrix(rnorm(12*4), ncol = 12)
colnames(df) <- c("AC-1", "AC-2", "AC-3", "AM-1", "AM-2", "AM-3", "SC-1", "SC-2", "SC-3", "SM-1", "SM-2", "SM-3")
df <- data.frame(compound = c("alanine ", "arginine", "asparagine", "aspartate"), df)
df
compound AC.1 AC.2 AC.3 AM.1 AM.2 AM.3 SC.1 SC.2 SC.3 SM.1
1 alanine 1.18362683 -2.03779314 -0.7217692 -1.7569264 -0.8381042 0.06866567 0.2327702 -1.1558879 1.2077454 0.437707310
2 arginine -0.19610110 0.05361113 0.6478384 -0.1768597 0.5905398 -0.67945600 -0.2221109 1.4032349 0.2387620 0.598236199
3 asparagine 0.02540509 0.47880021 -0.1395198 0.8394257 1.9046667 0.31175358 -0.5626059 0.3596091 -1.0963363 -1.004673116
4 aspartate -1.36397906 0.91380826 2.0630076 -0.6817453 -0.2713498 -2.01074098 1.4619707 -0.7257269 0.2851122 -0.007027878
SM.2 SM.3
1 -0.08419146 0.14275728
2 -1.44965718 -0.64314509
3 0.37673942 -0.07245741
4 0.52794136 1.62305413
You can do the following to extract (for example) the p-values:
library(zoo)
rollapply(t(df[, -1]), function(x) t.test(x)$p.value, width = 3, by = 3)
[,1] [,2] [,3] [,4]
[1,] 0.6308340 0.5702970 0.5783582 0.6468241
[2,] 0.2511564 0.8327439 0.1617192 0.2005518
[3,] 0.9026407 0.4309623 0.4156030 0.6441069
[4,] 0.3878145 0.4909217 0.6239915 0.2747601
x$stat <- sapply(1:nrow(x), function(i) t.test(as.numeric(as.character(unlist(x[i,2:4]))), as.numeric(as.character(unlist(x[i,8:10]))))[c("p.value")])