Does anyone know a good algorithm to word wrap an input string to a specified number of lines rather than a set width. Basically to achieve the minimum width for X lines.
I just thought of an approach:
You can write a function accepting two parameters 1. String 2. Number of lines
Get the length of the string (String.length if using C#). Divide the length by number of lines (lets say the result is n)
Now start a loop and access each character of the string (using string[i]) Insert a '\n\r' after every nth occurrence in the array of characters.
In the loop maintain a temp string array which would be null if there is a blank character(maintaining each word).
If there is a nth occurrence and temp string is not null then insert '\n\r' after that temp string.
I'll assume you're trying to minimize the maximum width of a string with n breaks. This can be done in O(words(str)*n) time and space using dynamic programming or recursion with memoziation.
The recurrence would look like this where the word has been split in to words
def wordwrap(remaining_words, n):
if n > 0 and len(remaining_words)==0:
return INFINITY #we havent chopped enough lines
if n == 0:
return len(remaining_words.join(' ')) # rest of the string
best = INFINITY
for i in range remaining_words:
# split here
best = min( max(wordwrap( remaining_words[i+1:], n-1),remaining_words[:i].join(' ')), best )
return best
This solution improves on Mikola's.
It's better because
This is written in Javascript:
// For testing calcMinWidth
var formatString = function (str, nLines) {
var words = str.split(" ");
var elWidths = words.map(function (s, i) {
return s.length;
});
var width = calcMinWidth(elWidths, 1, nLines, 0.1);
var format = function (width)
{
var lines = [];
var curLine = null;
var curLineLength = 0;
for (var i = 0; i < words.length; ++i) {
var word = words[i];
var elWidth = elWidths[i];
if (curLineLength + elWidth > width)
{
lines.push(curLine.join(" "));
curLine = [word];
curLineLength = elWidth;
continue;
}
if (i === 0)
curLine = [word];
else
{
curLineLength += 1;
curLine.push(word);
}
curLineLength += elWidth;
}
if (curLine !== null)
lines.push(curLine.join(" "));
return lines.join("\n");
};
return format(width);
};
var calcMinWidth = function (elWidths, separatorWidth, lines, tolerance)
{
var testFit = function (width)
{
var nCurLine = 1;
var curLineLength = 0;
for (var i = 0; i < elWidths.length; ++i) {
var elWidth = elWidths[i];
if (curLineLength + elWidth > width)
{
if (elWidth > width)
return false;
if (++nCurLine > lines)
return false;
curLineLength = elWidth;
continue;
}
if (i > 0)
curLineLength += separatorWidth;
curLineLength += elWidth;
}
return true;
};
var hi = 0;
var lo = null;
for (var i = 0; i < elWidths.length; ++i) {
var elWidth = elWidths[i];
if (i > 0)
hi += separatorWidth;
hi += elWidth;
if (lo === null || elWidth > lo)
lo = elWidth;
}
if (lo === null)
lo = 0;
while (hi - lo > tolerance)
{
var guess = (hi + lo) / 2;
if (testFit(guess))
hi = guess;
else
lo = guess;
}
return hi;
};
There was a discussion about this exact problem (though it was phrased in a different way) at http://www.perlmonks.org/?node_id=180276.
In the end the best solution was to do a binary search through all possible widths to find the smallest width that wound up with no more than the desired number of columns. If there are n
items and the average width is m
, then you'll need O(log(n) + log(m))
passes to find the right width, each of which takes O(n)
time, for O(n * (log(n) + log(m)))
. This is probably fast enough with no more need to be clever.
If you wish to be clever, you can create an array of word counts, and cumulative lengths of the words. Then use binary searches on this data structure to figure out where the line breaks are. Creating this data structure is O(n)
, and it makes all of the passes to figure out the right width be O(log(n) * (log(n) + log(m)))
which for reasonable lengths of words is dominated by your first O(n)
pass.
If the widths of words can be floating point, you'll need to do something more clever with the binary searches, but you are unlikely to need that particular optimization.
btilly has the right answer here, but just for fun I decided to code up a solution in python:
def wrap_min_width(words, n):
r, l = [], ""
for w in words:
if len(w) + len(l) > n:
r, l = r + [l], ""
l += (" " if len(l) > 0 else "") + w
return r + [l]
def min_lines(phrase, lines):
words = phrase.split(" ")
hi, lo = sum([ len(w) for w in words ]), min([len(w) for w in words])
while lo < hi:
mid = lo + (hi-lo)/2
v = wrap_min_width(words, mid)
if len(v) > lines:
lo = mid + 1
elif len(v) <= lines:
hi = mid
return lo, "\n".join(wrap_min_width(words, lo))
Now this still may not be exactly what you want, since if it is possible to wrap the words in fewer than n lines using the same line width, it instead returns the smallest number of lines encoding. (Of course you can always add extra empty lines, but it is a bit silly.) If I run it on your test case, here is what I get:
Case: "I would like to be wrapped into three lines", 3 lines
Result: 14 chars/line
I would like to
be wrapped into
three lines
I converted the C# accepted answer to JavaScript for something I was working on. Posting it here might save someone a few minutes of doing it themselves.
function WrapTextWithLimit(text, n) {
var words = text.toString().split(' ');
var cumwordwidth = [0];
words.forEach(function(word) {
cumwordwidth.push(cumwordwidth[cumwordwidth.length - 1] + word.length);
});
var totalwidth = cumwordwidth[cumwordwidth.length - 1] + words.length - 1;
var linewidth = (totalwidth - (n - 1.0)) / n;
var cost = function(i, j) {
var actuallinewidth = Math.max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
return (linewidth - actuallinewidth) * (linewidth - actuallinewidth);
};
var best = [];
var tmp = [];
best.push(tmp);
tmp.push([0.0, -1]);
words.forEach(function(word) {
tmp.push([Number.MAX_VALUE, -1]);
});
for (var l = 1; l < n + 1; ++l)
{
tmp = [];
best.push(tmp);
for (var j = 0; j < words.length + 1; ++j)
{
var min = [best[l - 1][0][0] + cost(0, j), 0];
for (var k = 0; k < j + 1; ++k)
{
var loc = best[l - 1][k][0] + cost(k, j);
if (loc < min[0] || (loc === min[0] && k < min[1])) {
min = [loc, k];
}
}
tmp.push(min);
}
}
var lines = [];
var b = words.length;
for (var p = n; p > 0; --p) {
var a = best[p][b][1];
lines.push(words.slice(a, b).join(' '));
b = a;
}
lines.reverse();
return lines;
}