Does anyone know a good algorithm to word wrap an input string to a specified number of lines rather than a set width. Basically to achieve the minimum width for X lines.
A way of solvng this problem would be using dynamic programming, You can solve this problem using dynamic programming, cf Minimum raggedness algorithm. I used some of the informations you add when you eddited your post with : Algorithm to divide text into 3 evenly-sized groups
Notations:
Let name your text document="word1 word2 .... wordp"
n= number of line required
LineWidth=len(document)/n
Cost function:
First you need to define a cost function of having word[i] to word[j] in the same line , you can take the same as the one as the one on wikipedia, with p=2 for example:
It represent the distance between the objective length of a line and the actual lenght.
The total cost function for the optimal solution can be defined with the following recursiion relation:
Solving the problem:
You can solve this problem using dynamic programming. I took the code from the link you gave, and changed it a so you see what the program is using.
Here is the result from the code:
D=minragged('Just testing to see how this works.')
number of words: 7
------------------------------------
stage : 0
------------------------------------
word i to j in line 0 TotalCost (f(j))
------------------------------------
i= 0 j= 0 121.0
i= 0 j= 1 49.0
i= 0 j= 2 1.0
i= 0 j= 3 16.0
i= 0 j= 4 64.0
i= 0 j= 5 144.0
i= 0 j= 6 289.0
i= 0 j= 7 576.0
------------------------------------
stage : 1
------------------------------------
word i to j in line 1 TotalCost (f(j))
------------------------------------
i= 0 j= 0 242.0
i= 0 j= 1 170.0
i= 0 j= 2 122.0
i= 0 j= 3 137.0
i= 0 j= 4 185.0
i= 0 j= 5 265.0
i= 0 j= 6 410.0
i= 0 j= 7 697.0
i= 1 j= 2 65.0
i= 1 j= 3 50.0
i= 1 j= 4 58.0
i= 1 j= 5 98.0
i= 1 j= 6 193.0
i= 1 j= 7 410.0
i= 2 j= 4 26.0
i= 2 j= 5 2.0
i= 2 j= 6 17.0
i= 2 j= 7 122.0
i= 3 j= 7 80.0
------------------------------------
stage : 2
------------------------------------
word i to j in line 2 TotalCost (f(j))
------------------------------------
i= 0 j= 7 818.0
i= 1 j= 7 531.0
i= 2 j= 7 186.0
i= 3 j= 7 114.0
i= 4 j= 7 42.0
i= 5 j= 7 2.0
reversing list
------------------------------------
Just testing 12
to see how 10
this works. 11
Reverse this and you get:
Just testing 12
to see how 10
this works. 11
Here is the code to print the reasonning,(in python sorry I don't use C#...but I someone actually translated the code in C#) :
def minragged(text, n=3):
P=2
words = text.split()
cumwordwidth = [0]
# cumwordwidth[-1] is the last element
for word in words:
cumwordwidth.append(cumwordwidth[-1] + len(word))
totalwidth = cumwordwidth[-1] + len(words) - 1 # len(words) - 1 spaces
linewidth = float(totalwidth - (n - 1)) / float(n) # n - 1 line breaks
print "number of words:", len(words)
def cost(i, j):
"""
cost of a line words[i], ..., words[j - 1] (words[i:j])
"""
actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i])
return (linewidth - float(actuallinewidth)) ** P
"""
printing the reasoning and reversing the return list
"""
F={} # Total cost function
for stage in range(n):
print "------------------------------------"
print "stage :",stage
print "------------------------------------"
print "word i to j in line",stage,"\t\tTotalCost (f(j))"
print "------------------------------------"
if stage==0:
F[stage]=[]
i=0
for j in range(i,len(words)+1):
print "i=",i,"j=",j,"\t\t\t",cost(i,j)
F[stage].append([cost(i,j),0])
elif stage==(n-1):
F[stage]=[[float('inf'),0] for i in range(len(words)+1)]
for i in range(len(words)+1):
j=len(words)
if F[stage-1][i][0]+cost(i,j)<F[stage][j][0]: #calculating min cost (cf f formula)
F[stage][j][0]=F[stage-1][i][0]+cost(i,j)
F[stage][j][1]=i
print "i=",i,"j=",j,"\t\t\t",F[stage][j][0]
else:
F[stage]=[[float('inf'),0] for i in range(len(words)+1)]
for i in range(len(words)+1):
for j in range(i,len(words)+1):
if F[stage-1][i][0]+cost(i,j)<F[stage][j][0]:
F[stage][j][0]=F[stage-1][i][0]+cost(i,j)
F[stage][j][1]=i
print "i=",i,"j=",j,"\t\t\t",F[stage][j][0]
print 'reversing list'
print "------------------------------------"
listWords=[]
a=len(words)
for k in xrange(n-1,0,-1):#reverse loop from n-1 to 1
listWords.append(' '.join(words[F[k][a][1]:a]))
a=F[k][a][1]
listWords.append(' '.join(words[0:a]))
listWords.reverse()
for line in listWords:
print line, '\t\t',len(line)
return listWords
Here is the accepted solution from Algorithm to divide text into 3 evenly-sized groups converted to C#:
static List<string> Minragged(string text, int n = 3)
{
var words = text.Split();
var cumwordwidth = new List<int>();
cumwordwidth.Add(0);
foreach (var word in words)
cumwordwidth.Add(cumwordwidth[cumwordwidth.Count - 1] + word.Length);
var totalwidth = cumwordwidth[cumwordwidth.Count - 1] + words.Length - 1;
var linewidth = (double)(totalwidth - (n - 1)) / n;
var cost = new Func<int, int, double>((i, j) =>
{
var actuallinewidth = Math.Max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
return (linewidth - actuallinewidth) * (linewidth - actuallinewidth);
});
var best = new List<List<Tuple<double, int>>>();
var tmp = new List<Tuple<double, int>>();
best.Add(tmp);
tmp.Add(new Tuple<double, int>(0.0f, -1));
foreach (var word in words)
tmp.Add(new Tuple<double, int>(double.MaxValue, -1));
for (int l = 1; l < n + 1; ++l)
{
tmp = new List<Tuple<double, int>>();
best.Add(tmp);
for (int j = 0; j < words.Length + 1; ++j)
{
var min = new Tuple<double, int>(best[l - 1][0].Item1 + cost(0, j), 0);
for (int k = 0; k < j + 1; ++k)
{
var loc = best[l - 1][k].Item1 + cost(k, j);
if (loc < min.Item1 || (loc == min.Item1 && k < min.Item2))
min = new Tuple<double, int>(loc, k);
}
tmp.Add(min);
}
}
var lines = new List<string>();
var b = words.Length;
for (int l = n; l > 0; --l)
{
var a = best[l][b].Item2;
lines.Add(string.Join(" ", words, a, b - a));
b = a;
}
lines.Reverse();
return lines;
}