Skip to content

Commit 1bfedae

Browse files
committed
#429 More correct implementation of break-word.
Take two, with additional tests.
1 parent 816193a commit 1bfedae

File tree

7 files changed

+203
-37
lines changed

7 files changed

+203
-37
lines changed

openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/Breaker.java

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import com.openhtmltopdf.css.style.CalculatedStyle;
2828
import com.openhtmltopdf.css.style.CssContext;
2929
import com.openhtmltopdf.extend.FSTextBreaker;
30+
import com.openhtmltopdf.layout.LineBreakContext.LineBreakResult;
3031
import com.openhtmltopdf.render.FSFont;
3132

3233
/**
@@ -71,7 +72,7 @@ private static int getFirstLetterEnd(String text, int start) {
7172
return end;
7273
}
7374

74-
public static void breakText(LayoutContext c,
75+
public static LineBreakResult breakText(LayoutContext c,
7576
LineBreakContext context, int avail,
7677
CalculatedStyle style, boolean tryToBreakAnywhere, int lineWidth) {
7778

@@ -84,7 +85,7 @@ public static void breakText(LayoutContext c,
8485
if (whitespace == IdentValue.NOWRAP) {
8586
context.setEnd(context.getLast());
8687
context.setWidth(Breaker.getTextWidthWithLetterSpacing(c, font, context.getCalculatedSubstring(), letterSpacing));
87-
return;
88+
return LineBreakResult.WORD_BREAKING_FINISHED;
8889
}
8990

9091
//check if we should break on the next newline
@@ -106,54 +107,75 @@ public static void breakText(LayoutContext c,
106107
//check if we may wrap
107108
if (whitespace == IdentValue.PRE ||
108109
(context.isNeedsNewLine() && context.getWidth() <= avail)) {
109-
return;
110+
return context.isNeedsNewLine() ?
111+
LineBreakResult.WORD_BREAKING_NEED_NEW_LINE :
112+
LineBreakResult.WORD_BREAKING_FINISHED;
110113
}
111114

112115
context.setEndsOnNL(false);
113116

114117
if (style.getWordWrap() != IdentValue.BREAK_WORD) {
115118
// Ordinary old word wrap which will overflow too long unbreakable words.
116-
doBreakText(c, context, avail, style, tryToBreakAnywhere);
119+
return doBreakText(c, context, avail, style, tryToBreakAnywhere);
117120
} else {
118121
int originalStart = context.getStart();
119-
120122
// The idea is we only break a word if it will not fit on a line by itself.
121123

124+
LineBreakResult result;
125+
LOOP:
122126
while (true) {
123-
doBreakText(c, context, avail, style, tryToBreakAnywhere);
127+
result = doBreakText(c, context, avail, style, tryToBreakAnywhere);
128+
129+
switch (result) {
130+
case WORD_BREAKING_FINISHED:
131+
case CHAR_BREAKING_FINISHED:
132+
case CHAR_BREAKING_UNBREAKABLE:
133+
case CHAR_BREAKING_NEED_NEW_LINE:
134+
break LOOP;
124135

125-
if (context.isFinished()) {
126-
break;
127-
} else if (tryToBreakAnywhere && context.isEndsOnWordBreak()) {
128-
// We were in char breaking mode, but have found a line breaking opportunity.
136+
case CHAR_BREAKING_FOUND_WORD_BREAK:
129137
tryToBreakAnywhere = false;
130-
} else if (!tryToBreakAnywhere && context.isNeedsNewLine() && context.getNextWidth() >= lineWidth) {
131-
// The next word will not fit on a line by itself so turn on char breaking mode.
132-
tryToBreakAnywhere = true;
133-
} else if (!tryToBreakAnywhere && context.isUnbreakable()) {
134-
// Safety valve: Not sure we need it.
135-
break;
136-
} else if (context.isNeedsNewLine()) {
137-
// Stop, we're at the end of the line.
138138
break;
139+
140+
case WORD_BREAKING_NEED_NEW_LINE: {
141+
if (context.getNextWidth() >= lineWidth) {
142+
tryToBreakAnywhere = true;
143+
break;
144+
} else {
145+
break LOOP;
146+
}
147+
}
148+
case WORD_BREAKING_UNBREAKABLE: {
149+
if (context.getWidth() >= lineWidth) {
150+
tryToBreakAnywhere = true;
151+
context.resetEnd();
152+
continue LOOP;
153+
} else {
154+
break LOOP;
155+
}
156+
}
157+
158+
default:
159+
break LOOP;
139160
}
140161

141-
avail -= context.getWidth();
142162
context.setStart(context.getEnd());
163+
avail -= context.getWidth();
143164
}
144165

145166
context.setStart(originalStart);
146167

147168
// We need to know this for the next line.
148169
context.setFinishedInCharBreakingMode(tryToBreakAnywhere);
170+
return result;
149171
}
150172
}
151173

152-
private static void doBreakText(LayoutContext c,
174+
private static LineBreakResult doBreakText(LayoutContext c,
153175
LineBreakContext context, int avail, CalculatedStyle style,
154176
boolean tryToBreakAnywhere) {
155177
if (!tryToBreakAnywhere) {
156-
doBreakText(c, context, avail, style, STANDARD_LINE_BREAKER);
178+
return doBreakText(c, context, avail, style, STANDARD_LINE_BREAKER);
157179
} else {
158180
FSFont font = style.getFSFont(c);
159181

@@ -168,15 +190,15 @@ private static void doBreakText(LayoutContext c,
168190
FSTextBreaker lineIterator = STANDARD_LINE_BREAKER.getBreaker(currentString, c.getSharedContext());
169191
FSTextBreaker charIterator = STANDARD_CHARACTER_BREAKER.getBreaker(currentString, c.getSharedContext());
170192

171-
doBreakCharacters(currentString, lineIterator, charIterator, context, avail, letterSpacing, measurer);
193+
return doBreakCharacters(currentString, lineIterator, charIterator, context, avail, letterSpacing, measurer);
172194
}
173195
}
174196

175197
/**
176198
* Breaks at most one word (until the next word break) going character by character to see
177199
* what will fit in.
178200
*/
179-
static void doBreakCharacters(
201+
static LineBreakResult doBreakCharacters(
180202
String currentString,
181203
FSTextBreaker lineIterator,
182204
FSTextBreaker charIterator,
@@ -237,10 +259,19 @@ static void doBreakCharacters(
237259

238260
if (graphicsLength == avail) {
239261
// Exact fit..
240-
context.setNeedsNewLine(currentString.length() > left);
241-
context.setEnd(left);
262+
boolean needNewLine = currentString.length() > left;
263+
264+
context.setNeedsNewLine(needNewLine);
265+
context.setEnd(left + context.getStart());
242266
context.setWidth(graphicsLength);
243-
return;
267+
268+
if (left >= currentString.length()) {
269+
return LineBreakResult.CHAR_BREAKING_FINISHED;
270+
} else if (left >= nextWordBreak) {
271+
return LineBreakResult.CHAR_BREAKING_FOUND_WORD_BREAK;
272+
} else {
273+
return LineBreakResult.CHAR_BREAKING_NEED_NEW_LINE;
274+
}
244275
}
245276

246277
if (nextCharBreak < 0) {
@@ -252,7 +283,7 @@ static void doBreakCharacters(
252283
lastGoodWrap = nextCharBreak;
253284
lastGoodGraphicsLength = graphicsLength;
254285

255-
nextCharBreak = Math.min(currentString.length(), nextWordBreak);
286+
nextCharBreak = nextWordBreak;
256287

257288
float extraSpacing = (nextCharBreak - left) * letterSpacing;
258289
int splitWidth = (int) (measurer.applyAsInt(currentString.substring(left, nextCharBreak)) + extraSpacing);
@@ -265,7 +296,14 @@ static void doBreakCharacters(
265296
context.setWidth(graphicsLength);
266297
context.setEnd(nextCharBreak + context.getStart());
267298
context.setEndsOnWordBreak(nextCharBreak == nextWordBreak);
268-
return;
299+
300+
if (nextCharBreak >= currentString.length()) {
301+
return LineBreakResult.CHAR_BREAKING_FINISHED;
302+
} else if (nextCharBreak >= nextWordBreak) {
303+
return LineBreakResult.CHAR_BREAKING_FOUND_WORD_BREAK;
304+
} else {
305+
return LineBreakResult.CHAR_BREAKING_NEED_NEW_LINE;
306+
}
269307
}
270308

271309
// We need a newline for this word.
@@ -276,7 +314,14 @@ static void doBreakCharacters(
276314
context.setWidth(lastGoodGraphicsLength);
277315
context.setEnd(lastGoodWrap + context.getStart());
278316
context.setEndsOnWordBreak(lastGoodWrap == nextWordBreak);
279-
return;
317+
318+
if (lastGoodWrap >= currentString.length()) {
319+
return LineBreakResult.CHAR_BREAKING_FINISHED;
320+
} else if (lastGoodWrap >= nextWordBreak) {
321+
return LineBreakResult.CHAR_BREAKING_FOUND_WORD_BREAK;
322+
} else {
323+
return LineBreakResult.CHAR_BREAKING_NEED_NEW_LINE;
324+
}
280325
} else {
281326
// One character word, so we didn't find a wrap point.
282327
float extraSpacing = nextWordBreak * letterSpacing;
@@ -286,7 +331,8 @@ static void doBreakCharacters(
286331
context.setEnd(nextWordBreak + context.getStart());
287332
context.setEndsOnWordBreak(true);
288333
context.setWidth(splitWidth);
289-
return;
334+
335+
return LineBreakResult.CHAR_BREAKING_UNBREAKABLE;
290336
}
291337
}
292338

@@ -308,7 +354,7 @@ void copyTo(AppBreakOpportunity other) {
308354
}
309355
}
310356

311-
public static void doBreakText(
357+
public static LineBreakResult doBreakText(
312358
LayoutContext c,
313359
LineBreakContext context,
314360
int avail,
@@ -387,7 +433,7 @@ public static void doBreakText(
387433
context.setWidth(current.graphicsLength);
388434
context.setEnd(context.getMaster().length());
389435
// It all fit!
390-
return;
436+
return LineBreakResult.WORD_BREAKING_FINISHED;
391437
}
392438

393439
context.setNeedsNewLine(true);
@@ -403,6 +449,8 @@ public static void doBreakText(
403449

404450
context.setNextWidth(nextUnfittableSplitWidth);
405451
context.setEnd(context.getStart() + lastWrap);
452+
453+
return LineBreakResult.WORD_BREAKING_NEED_NEW_LINE;
406454
} else {
407455
// Unbreakable string
408456
if (current.left == 0) {
@@ -420,6 +468,8 @@ public static void doBreakText(
420468
} else {
421469
context.setWidth(current.graphicsLength);
422470
}
471+
472+
return LineBreakResult.WORD_BREAKING_UNBREAKABLE;
423473
}
424474
}
425475

openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/InlineBoxing.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,11 @@ public static void layoutContent(LayoutContext c, BlockBox box, int initialY, in
186186
continue;
187187
}
188188
} else {
189-
if (!startInlineText(c, lbContext, inlineBox, space, current, fit, trimmedLeadingSpace, inCharBreakingMode)) {
189+
boolean shouldContinue = !startInlineText(c, lbContext, inlineBox, space, current, fit, trimmedLeadingSpace, inCharBreakingMode);
190+
inCharBreakingMode = lbContext.isFinishedInCharBreakingMode();
191+
if (shouldContinue) {
190192
continue;
191193
}
192-
inCharBreakingMode = lbContext.isFinishedInCharBreakingMode();
193194
}
194195
}
195196

@@ -374,14 +375,15 @@ private static boolean startInlineText(
374375
boolean trimmedLeadingSpace, boolean tryToBreakAnywhere) {
375376

376377
lbContext.saveEnd();
378+
CalculatedStyle style = inlineBox.getStyle();
377379

378380
// Layout the text into the remaining width on this line. Will only go to the end of the line (at most)
379381
// and will produce one InlineText object.
380382
InlineText inlineText = layoutText(
381-
c, inlineBox.getStyle(), space.remainingWidth - fit, lbContext, false, inlineBox.getTextDirection(), tryToBreakAnywhere, space.maxAvailableWidth - fit);
383+
c, style, space.remainingWidth - fit, lbContext, false, inlineBox.getTextDirection(), tryToBreakAnywhere, space.maxAvailableWidth - fit);
382384

383-
if (inlineBox.getStyle().hasLetterSpacing()) {
384-
inlineText.setLetterSpacing(inlineBox.getStyle().getFloatPropertyProportionalWidth(CSSName.LETTER_SPACING, 0, c));
385+
if (style.hasLetterSpacing()) {
386+
inlineText.setLetterSpacing(style.getFloatPropertyProportionalWidth(CSSName.LETTER_SPACING, 0, c));
385387
}
386388

387389
if (lbContext.isUnbreakable() && !current.line.isContainsContent()) {

openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/LineBreakContext.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,20 @@
2525
* to the layout code.
2626
*/
2727
public class LineBreakContext {
28+
public static enum LineBreakResult {
29+
CHAR_BREAKING_NEED_NEW_LINE,
30+
WORD_BREAKING_NEED_NEW_LINE,
31+
32+
CHAR_BREAKING_UNBREAKABLE,
33+
WORD_BREAKING_UNBREAKABLE,
34+
35+
CHAR_BREAKING_FOUND_WORD_BREAK,
36+
37+
CHAR_BREAKING_FINISHED,
38+
WORD_BREAKING_FINISHED;
39+
}
40+
41+
2842
private String _master;
2943
private int _start;
3044
private int _end;
9.2 KB
Binary file not shown.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
<html>
2+
<head>
3+
<style>
4+
@page {
5+
size: 100mm 25cm;
6+
}
7+
body, td {
8+
border: 1px solid black;
9+
}
10+
table, .wrap {
11+
word-wrap: break-word;
12+
}
13+
</style>
14+
</head>
15+
<body style="font-family: monospace;">
16+
17+
<div class="wrap">
18+
FirstWordTooLongForLineSoItShouldWrap MiddleWordTooLongForLineSoItShouldWrap LastWordTooLongForLineSoItShouldWrap
19+
</div><br/>
20+
21+
<div class="wrap">
22+
First Word Too Long For Line So It Should MiddleWordTooLongForLineSoItShouldWrap LastWordTooLongForLineSoItShouldWrap
23+
</div><br/>
24+
25+
<div class="wrap">
26+
First Word Too Long For Line So It Should MiddleWordTooLongForLineSoItShouldWrap Last Word Too Long For Line So It Should
27+
</div><br/>
28+
29+
<div class="wrap">
30+
FirstWordNotTooLongForLineSoIt FirstWordNotTooLongForLineSoIt FirstWordNotTooLongForLineSoIt FirstWordNotTooLongForLineSoIt
31+
</div><br/>
32+
33+
<div class="wrap">
34+
FirstWordNotTooLongForLine <b>FirstWordNotTooLongForLine</b> <b><i>FirstWordNotTooLongForLineSoIt FirstWordNotTooLongForLineSoIt</i></b>
35+
</div><br/>
36+
37+
<div class="wrap">
38+
One two three FirstWordNotTooLongForLineSoIt four five FirstWordNotTooLongForLineSoIt six seven FirstWordNotTooLongForLineSoIt eight FirstWordNotTooLongForLineSoIt
39+
</div><br/>
40+
41+
<table>
42+
<tr><td>OneTwoThreeFour</td><td>Five six seven</td><td>Eight nine</td></tr>
43+
<tr><td>One Two Three Four</td><td>Fivesixseven</td><td>Eight nine</td></tr>
44+
<tr><td>One Two Three Four</td><td>Five six seven</td><td>Eightnine</td></tr>
45+
<tr><td>OneTwoThreeFour</td><td>Fivesixseven</td><td>Eightnine</td></tr>
46+
</table>
47+
48+
</body>
49+
</html>
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<html>
2+
<head>
3+
<style>
4+
@page {
5+
size: 60mm 20cm;
6+
margin: 0;
7+
}
8+
body {
9+
word-wrap: break-word;
10+
margin: 0;
11+
max-width: 60mm;
12+
}
13+
</style>
14+
</head>
15+
<body>
16+
17+
<div>
18+
<div style="width: 50%; height: 30px; background-color: red;float: left;"></div>Some words some more.
19+
FirstWordTooLongForLineSoItShouldWrap MiddleWordTooLongForLineSoItShouldWrap LastWordTooLongForLineSoItShouldWrap
20+
</div>
21+
22+
<div style="clear:both;"></div>
23+
24+
<div>
25+
<div style="width: 50%; height: 30px; background-color: red;float: right;"></div>
26+
FirstWordTooLongForLineSoItShouldWrap MiddleWordTooLongForLineSoItShouldWrap LastWordTooLongForLineSoItShouldWrap
27+
</div>
28+
29+
<div style="clear:both;"></div>
30+
31+
</body>
32+
</html>

0 commit comments

Comments
 (0)