How do I read/write libsvm data into/from R?
The libsvm format is sparse data like
[
Based on some comments. I add it as an aswer so it's easier for others to use. This is to write data in libsvm format.
Function to write a data.frame to svm light format. I've added a train={TRUE, FALSE} argument in case the data doesn't have labels. In this case, the class index is ignored.
write.libsvm = function(data, filename= "out.dat", class = 1, train=TRUE) {
out = file(filename)
if(train){
writeLines(apply(data, 1, function(X) {
paste(X[class],
apply(cbind(which(X!=0)[-class],
X[which(X!=0)[-class]]),
1, paste, collapse=":"),
collapse=" ")
}), out)
} else {
# leaves 1 as default for the new data without predictions.
writeLines(apply(data, 1, function(X) {
paste('1',
apply(cbind(which(X!=0), X[which(X!=0)]), 1, paste, collapse=":"),
collapse=" ")
}), out)
}
close(out)
}
** EDIT **
library(data.table)
data.table.fm <- function (data = X, fileName = "../out.fm", target = "y_train",
train = TRUE) {
if (train) {
if (is.logical(data[[target]]) | sum(levels(factor(data[[target]])) ==
levels(factor(c(0, 1)))) == 2) {
data[[target]][data[[target]] == TRUE] = 1
data[[target]][data[[target]] == FALSE] = -1
}
}
specChar = "\\(|\\)|\\||\\:"
specCharSpace = "\\(|\\)|\\||\\:| "
parsingNames <- function(x) {
ret = c()
for (el in x) ret = append(ret, gsub(specCharSpace, "_",
el))
ret
}
parsingVar <- function(x, keepSpace, hard_parse) {
if (!keepSpace)
spch = specCharSpace
else spch = specChar
if (hard_parse)
gsub("(^_( *|_*)+)|(^_$)|(( *|_*)+_$)|( +_+ +)",
" ", gsub(specChar, "_", gsub("(^ +)|( +$)",
"", x)))
else gsub(spch, "_", x)
}
setnames(data, names(data), parsingNames(names(data)))
target = parsingNames(target)
format_vw <- function(column, formater) {
ifelse(as.logical(column), sprintf(formater, j, column),
"")
}
all_vars = names(data)[!names(data) %in% target]
cat("Reordering data.table if class isn't first\n")
target_inx = which(names(data) %in% target)
rest_inx = which(!names(data) %in% target)
cat("Adding Variable names to data.table\n")
for (j in rest_inx) {
column = data[[j]]
formater = "%s:%f"
set(data, i = NULL, j = j, value = format_vw(column,
formater))
cat(sprintf("Fixing %s\n", j))
}
data = data[, c(target_inx, rest_inx), with = FALSE]
drop_extra_space <- function(x) {
gsub(" {1,}", " ", x)
}
cat("Pasting data - Removing extra spaces\n")
data = apply(data, 1, function(x) drop_extra_space(paste(x,
collapse = " ")))
cat("Writing to disk\n")
write.table(data, file = fileName, sep = " ", row.names = FALSE,
col.names = FALSE, quote = FALSE)
}