// Split the characters into trees of TextBlocks, one tree for each
// rotation. Merge into a single tree (with the primary rotation).
TextBlock *TextPage::splitChars(GList *charsA) {
TextBlock *tree[4];
TextBlock *blk;
GList *chars2, *clippedChars;
TextChar *ch;
int rot, i;
// split: build a tree of TextBlocks for each rotation
clippedChars = new GList();
for (rot = 0; rot < 4; ++rot) {
chars2 = new GList();
for (i = 0; i < charsA->getLength(); ++i) {
ch = (TextChar *)charsA->get(i);
if (ch->rot == rot) {
chars2->append(ch);
}
}
tree[rot] = NULL;
if (chars2->getLength() > 0) {
chars2->sort((rot & 1) ? &TextChar::cmpY : &TextChar::cmpX);
removeDuplicates(chars2, rot);
if (control.clipText) {
i = 0;
while (i < chars2->getLength()) {
ch = (TextChar *)chars2->get(i);
if (ch->clipped) {
ch = (TextChar *)chars2->del(i);
clippedChars->append(ch);
} else {
++i;
}
}
}
if (chars2->getLength() > 0) {
tree[rot] = split(chars2, rot);
}
}
delete chars2;
}
//Joan modify at 20151021 for selecting vertical text accurately
//modify from tree[0] to tree[curRot] to the end of this function
// if the page contains no (unclipped) text, just leave an empty
// column list
if (!tree[curRot]) {
delete clippedChars;
return NULL;
}
// if the main tree is not a multicolumn node, insert one so that
// rotated text has somewhere to go
if (tree[curRot]->tag != blkTagMulticolumn) {
blk = new TextBlock(blkHorizSplit, 0);
blk->addChild(tree[curRot]);
blk->tag = blkTagMulticolumn;
tree[curRot] = blk;
}
// merge non-primary-rotation text into the primary-rotation tree
for (rot = 0; rot < 4; ++rot) {
if (rot == curRot) {
continue;
}
if (tree[rot]) {
insertIntoTree(tree[rot], tree[curRot]);
tree[rot] = NULL;
}
}
if (clippedChars->getLength()) {
insertClippedChars(clippedChars, tree[curRot]);
}
delete clippedChars;
#if 0 //~debug
dumpTree(tree[curRot]);
#endif
return tree[curRot];
}
void TextPage::updateFont(GfxState *state) {
GfxFont *gfxFont;
double *fm;
char *name;
int code, mCode, letterCode, anyCode;
double w;
double m[4], m2[4];
int i, wMode;
// get the font info object
curFont = NULL;
for (i = 0; i < fonts->getLength(); ++i) {
curFont = (TextFontInfo *)fonts->get(i);
if (curFont->matches(state)) {
break;
}
curFont = NULL;
}
if (!curFont) {
curFont = new TextFontInfo(state);
fonts->append(curFont);
}
// adjust the font size
gfxFont = state->getFont();
curFontSize = state->getTransformedFontSize();
if (gfxFont && gfxFont->getType() == fontType3) {
// This is a hack which makes it possible to deal with some Type 3
// fonts. The problem is that it's impossible to know what the
// base coordinate system used in the font is without actually
// rendering the font. This code tries to guess by looking at the
// width of the character 'm' (which breaks if the font is a
// subset that doesn't contain 'm').
mCode = letterCode = anyCode = -1;
for (code = 0; code < 256; ++code) {
name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
if (name && name[0] == 'm' && name[1] == '\0') {
mCode = code;
}
if (letterCode < 0 && name && name[1] == '\0' &&
((name[0] >= 'A' && name[0] <= 'Z') ||
(name[0] >= 'a' && name[0] <= 'z'))) {
letterCode = code;
}
if (anyCode < 0 && name &&
((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {
anyCode = code;
}
}
if (mCode >= 0 &&
(w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {
// 0.6 is a generic average 'm' width -- yes, this is a hack
curFontSize *= w / 0.6;
} else if (letterCode >= 0 &&
(w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {
// even more of a hack: 0.5 is a generic letter width
curFontSize *= w / 0.5;
} else if (anyCode >= 0 &&
(w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {
// better than nothing: 0.5 is a generic character width
curFontSize *= w / 0.5;
}
fm = gfxFont->getFontMatrix();
if (fm[0] != 0) {
curFontSize *= fabs(fm[3] / fm[0]);
}
}
if (gfxFont) {
// compute the rotation
state->getFontTransMat(&m[0], &m[1], &m[2], &m[3]);
if (gfxFont && gfxFont->getType() == fontType3) {
fm = gfxFont->getFontMatrix();
m2[0] = fm[0] * m[0] + fm[1] * m[2];
m2[1] = fm[0] * m[1] + fm[1] * m[3];
m2[2] = fm[2] * m[0] + fm[3] * m[2];
m2[3] = fm[2] * m[1] + fm[3] * m[3];
m[0] = m2[0];
m[1] = m2[1];
m[2] = m2[2];
m[3] = m2[3];
}
if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
curRot = (m[0] > 0 || m[3] < 0) ? 0 : 2;
} else {
curRot = (m[2] > 0) ? 1 : 3;
}
// for vertical writing mode, the lines are effectively rotated 90
// degrees, Joan modify this acrroding to old version
wMode = state->getFont() ? state->getFont()->getWMode() : 0;
if (wMode) {
curRot = (curRot + 1) & 3;
}
}
}
void TextPage::addChar(GfxState *state, double x, double y,
double dx, double dy,
CharCode c, int nBytes, Unicode *u, int uLen) {
double x1, y1, x2, y2, w1, h1, dx2, dy2, ascent, descent, sp;
double xMin, yMin, xMax, yMax;
double clipXMin, clipYMin, clipXMax, clipYMax;
GfxRGB rgb;
GBool clipped, rtl;
int i, j, wMode;
// if we're in an ActualText span, save the position info (the
// ActualText chars will be added by TextPage::endActualText()).
if (actualText) {
if (!actualTextNBytes) {
actualTextX0 = x;
actualTextY0 = y;
}
actualTextX1 = x + dx;
actualTextY1 = y + dy;
actualTextNBytes += nBytes;
return;
}
// subtract char and word spacing from the dx,dy values
sp = state->getCharSpace();
if (c == (CharCode)0x20) {
sp += state->getWordSpace();
}
state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
dx -= dx2;
dy -= dy2;
state->transformDelta(dx, dy, &w1, &h1);
// throw away chars that aren't inside the page bounds
// (and also do a sanity check on the character size)
state->transform(x, y, &x1, &y1);
if (x1 + w1 < 0 || x1 > pageWidth ||
y1 + h1 < 0 || y1 > pageHeight ||
w1 > pageWidth || h1 > pageHeight) {
charPos += nBytes;
return;
}
// check the tiny chars limit
if (!globalParams->getTextKeepTinyChars() &&
fabs(w1) < 3 && fabs(h1) < 3) {
if (++nTinyChars > 50000) {
charPos += nBytes;
return;
}
}
// skip space characters
if (uLen == 1 && u[0] == (Unicode)0x20) {
charPos += nBytes;
return;
}
// check for clipping
clipped = gFalse;
if (control.clipText) {
state->getClipBBox(&clipXMin, &clipYMin, &clipXMax, &clipYMax);
if (x1 + 0.1 * w1 < clipXMin || x1 + 0.9 * w1 > clipXMax ||
y1 + 0.1 * h1 < clipYMin || y1 + 0.9 * h1 > clipYMax) {
clipped = gTrue;
}
}
// add the characters
if (uLen > 0) {
// handle right-to-left ligatures: if there are multiple Unicode
// characters, and they're all right-to-left, insert them in
// right-to-left order
if (uLen > 1) {
rtl = gTrue;
for (i = 0; i < uLen; ++i) {
if (!unicodeTypeR(u[i])) {
rtl = gFalse;
break;
}
}
} else {
rtl = gFalse;
}
w1 /= uLen;
h1 /= uLen;
ascent = curFont->ascent * curFontSize;
descent = curFont->descent * curFontSize;
// for vertical writing mode, the lines are effectively rotated 90
// degrees
wMode = state->getFont() ? state->getFont()->getWMode() : 0;
for (i = 0; i < uLen; ++i) {
x2 = x1 + i * w1;
y2 = y1 + i * h1;
//Joan add wMode acrroding to old version for correcting the position of vertical characters
if (wMode) { // vertical writing mode
// NB: the rotation value has been incremented by 1 (in
// TextPage::updateFont()) for vertical writing mode
switch (curRot) {
case 0:
xMin = x2 - w1;
xMax = x2;
yMin = y2 - curFontSize;
yMax = y2;
break;
case 1:
xMin = x2;
xMax = x2 + curFontSize;
yMin = y2 - h1;
yMax = y2;
break;
case 2:
xMin = x2;
xMax = x2 + w1;
yMin = y2;
yMax = y2 + curFontSize;
break;
case 3:
xMin = x2 - curFontSize;
xMax = x2;
yMin = y2;
yMax = y2 + h1;
break;
}
} //Joan+ end
else {
switch (curRot) {
case 0:
default:
xMin = x2;
xMax = x2 + w1;
yMin = y2 - ascent;
yMax = y2 - descent;
break;
case 1:
xMin = x2 + descent;
xMax = x2 + ascent;
yMin = y2;
yMax = y2 + h1;
break;
case 2:
xMin = x2 + w1;
xMax = x2;
yMin = y2 + descent;
yMax = y2 + ascent;
break;
case 3:
xMin = x2 - ascent;
xMax = x2 - descent;
yMin = y2 + h1;
yMax = y2;
break;
}
}
if ((state->getRender() & 3) == 1) {
state->getStrokeRGB(&rgb);
} else {
state->getFillRGB(&rgb);
}
if (rtl) {
j = uLen - 1 - i;
} else {
j = i;
}
chars->append(new TextChar(u[j], charPos, nBytes, xMin, yMin, xMax, yMax,
curRot, clipped,
state->getRender() == 3,
curFont, curFontSize,
colToDbl(rgb.r), colToDbl(rgb.g),
colToDbl(rgb.b)));
}
}
charPos += nBytes;
}
GBool TextPage::findText(Unicode *s, int len,
GBool startAtTop, GBool stopAtBottom,
GBool startAtLast, GBool stopAtLast,
GBool caseSensitive, GBool backward,
GBool wholeWord,
double *xMin, double *yMin,
double *xMax, double *yMax) {
TextBlock *tree;
TextColumn *column;
TextParagraph *par;
TextLine *line;
Unicode *s2, *txt;
Unicode *p;
double xStart, yStart, xStop, yStop;
double xMin0, yMin0, xMax0, yMax0;
double xMin1, yMin1, xMax1, yMax1;
GBool found;
int txtSize, m, rot, colIdx, parIdx, lineIdx, i, j, k;
//~ need to handle right-to-left text
if (!findCols) {
//Joan annotate this at 20151021 for finding vertical text
//rot = rotateChars(chars);
if ((tree = splitChars(chars))) {
findCols = buildColumns(tree);
delete tree;
} else {
// no text
findCols = new GList();
}
//unrotateChars(chars, rot);
//unrotateColumns(findCols, rot);
//Joan modify end
}
// convert the search string to uppercase
if (!caseSensitive) {
s2 = (Unicode *)gmallocn(len, sizeof(Unicode));
for (i = 0; i < len; ++i) {
s2[i] = unicodeToUpper(s[i]);
}
} else {
s2 = s;
}
txt = NULL;
txtSize = 0;
xStart = yStart = xStop = yStop = 0;
if (startAtLast && haveLastFind) {
xStart = lastFindXMin;
yStart = lastFindYMin;
} else if (!startAtTop) {
xStart = *xMin;
yStart = *yMin;
}
if (stopAtLast && haveLastFind) {
xStop = lastFindXMin;
yStop = lastFindYMin;
} else if (!stopAtBottom) {
xStop = *xMax;
yStop = *yMax;
}
found = gFalse;
xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy
for (colIdx = backward ? findCols->getLength() - 1 : 0;
backward ? colIdx >= 0 : colIdx < findCols->getLength();
colIdx += backward ? -1 : 1) {
column = (TextColumn *)findCols->get(colIdx);
// check: is the column above the top limit?
if (!startAtTop && (backward ? column->yMin > yStart
: column->yMax < yStart)) {
continue;
}
// check: is the column below the bottom limit?
if (!stopAtBottom && (backward ? column->yMax < yStop
: column->yMin > yStop)) {
continue;
}
for (parIdx = backward ? column->paragraphs->getLength() - 1 : 0;
backward ? parIdx >= 0 : parIdx < column->paragraphs->getLength();
parIdx += backward ? -1 : 1) {
par = (TextParagraph *)column->paragraphs->get(parIdx);
// check: is the paragraph above the top limit?
if (!startAtTop && (backward ? par->yMin > yStart
: par->yMax < yStart)) {
continue;
}
// check: is the paragraph below the bottom limit?
if (!stopAtBottom && (backward ? par->yMax < yStop
: par->yMin > yStop)) {
continue;
}
for (lineIdx = backward ? par->lines->getLength() - 1 : 0;
backward ? lineIdx >= 0 : lineIdx < par->lines->getLength();
lineIdx += backward ? -1 : 1) {
line = (TextLine *)par->lines->get(lineIdx);
// check: is the line above the top limit?
if (!startAtTop && (backward ? line->yMin > yStart
: line->yMax < yStart)) {
continue;
}
// check: is the line below the bottom limit?
if (!stopAtBottom && (backward ? line->yMax < yStop
: line->yMin > yStop)) {
continue;
}
// convert the line to uppercase
m = line->len;
if (!caseSensitive) {
if (m > txtSize) {
txt = (Unicode *)greallocn(txt, m, sizeof(Unicode));
txtSize = m;
}
for (k = 0; k < m; ++k) {
txt[k] = unicodeToUpper(line->text[k]);
}
} else {
txt = line->text;
}
// search each position in this line
j = backward ? m - len : 0;
p = txt + j;
while (backward ? j >= 0 : j <= m - len) {
if (!wholeWord ||
((j == 0 || !unicodeTypeWord(txt[j - 1])) &&
(j + len == m || !unicodeTypeWord(txt[j + len])))) {
// compare the strings
for (k = 0; k < len; ++k) {
if (p[k] != s2[k]) {
break;
}
}
// found it
if (k == len) {
switch (line->rot) {
case 0:
xMin1 = line->edge[j];
xMax1 = line->edge[j + len];
yMin1 = line->yMin;
yMax1 = line->yMax;
break;
case 1:
xMin1 = line->xMin;
xMax1 = line->xMax;
yMin1 = line->edge[j];
yMax1 = line->edge[j + len];
break;
case 2:
xMin1 = line->edge[j + len];
xMax1 = line->edge[j];
yMin1 = line->yMin;
yMax1 = line->yMax;
break;
case 3:
xMin1 = line->xMin;
xMax1 = line->xMax;
yMin1 = line->edge[j + len];
yMax1 = line->edge[j];
break;
}
if (backward) {
if ((startAtTop ||
yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) &&
(stopAtBottom ||
yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) {
if (!found ||
yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) {
xMin0 = xMin1;
xMax0 = xMax1;
yMin0 = yMin1;
yMax0 = yMax1;
found = gTrue;
}
}
} else {
if ((startAtTop ||
yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) &&
(stopAtBottom ||
yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) {
if (!found ||
yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) {
xMin0 = xMin1;
xMax0 = xMax1;
yMin0 = yMin1;
yMax0 = yMax1;
found = gTrue;
}
}
}
}
}
if (backward) {
--j;
--p;
} else {
++j;
++p;
}
}
}
}
}
if (!caseSensitive) {
gfree(s2);
gfree(txt);
}
if (found) {
*xMin = xMin0;
*xMax = xMax0;
*yMin = yMin0;
*yMax = yMax0;
lastFindXMin = xMin0;
lastFindYMin = yMin0;
haveLastFind = gTrue;
return gTrue;
}
return gFalse;
}
void TextOutputDev::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode c, int nBytes, Unicode *u, int uLen) {
//Joan modify the second and third argument acrroding to the old version
text->addChar(state, x - originX, y - originY, dx, dy, c, nBytes, u, uLen);
}