xpdf with vertical text_acrroding to6,-CSDN博客

本文链接：https://blog.csdn.net/u012905879/article/details/49359369
// Split the characters into trees of TextBlocks, one tree for each
// rotation.  Merge into a single tree (with the primary rotation).
TextBlock *TextPage::splitChars(GList *charsA) {
  TextBlock *tree[4];
  TextBlock *blk;
  GList *chars2, *clippedChars;
  TextChar *ch;
  int rot, i;

  // split: build a tree of TextBlocks for each rotation
  clippedChars = new GList();
  for (rot = 0; rot < 4; ++rot) {
    chars2 = new GList();
    for (i = 0; i < charsA->getLength(); ++i) {
      ch = (TextChar *)charsA->get(i);
      if (ch->rot == rot) {
	chars2->append(ch);
      }
    }
    tree[rot] = NULL;
    if (chars2->getLength() > 0) {
      chars2->sort((rot & 1) ? &TextChar::cmpY : &TextChar::cmpX);
      removeDuplicates(chars2, rot);
      if (control.clipText) {
	i = 0;
	while (i < chars2->getLength()) {
	  ch = (TextChar *)chars2->get(i);
	  if (ch->clipped) {
	    ch = (TextChar *)chars2->del(i);
	    clippedChars->append(ch);
	  } else {
	    ++i;
	  }
	}
      }
      if (chars2->getLength() > 0) {
	tree[rot] = split(chars2, rot);
      }
    }
    delete chars2;
  }

  //Joan modify at 20151021 for selecting vertical text accurately
  //modify from tree[0] to tree[curRot] to the end of this function
  // if the page contains no (unclipped) text, just leave an empty
  // column list
  if (!tree[curRot]) {
    delete clippedChars;
    return NULL;
  }

  // if the main tree is not a multicolumn node, insert one so that
  // rotated text has somewhere to go
  if (tree[curRot]->tag != blkTagMulticolumn) {
    blk = new TextBlock(blkHorizSplit, 0);
    blk->addChild(tree[curRot]);
    blk->tag = blkTagMulticolumn;
    tree[curRot] = blk;
  }

  // merge non-primary-rotation text into the primary-rotation tree
  for (rot = 0; rot < 4; ++rot) {
	  if (rot == curRot) {
		  continue;
	  }
    if (tree[rot]) {
      insertIntoTree(tree[rot], tree[curRot]);
      tree[rot] = NULL;
    }
  }

  if (clippedChars->getLength()) {
    insertClippedChars(clippedChars, tree[curRot]);
  }
  delete clippedChars;

#if 0 //~debug
  dumpTree(tree[curRot]);
#endif

  return tree[curRot];
}

void TextPage::updateFont(GfxState *state) {
  GfxFont *gfxFont;
  double *fm;
  char *name;
  int code, mCode, letterCode, anyCode;
  double w;
  double m[4], m2[4];
  int i, wMode;

  // get the font info object
  curFont = NULL;
  for (i = 0; i < fonts->getLength(); ++i) {
    curFont = (TextFontInfo *)fonts->get(i);
    if (curFont->matches(state)) {
      break;
    }
    curFont = NULL;
  }
  if (!curFont) {
    curFont = new TextFontInfo(state);
    fonts->append(curFont);
  }

  // adjust the font size
  gfxFont = state->getFont();
  curFontSize = state->getTransformedFontSize();
  if (gfxFont && gfxFont->getType() == fontType3) {
    // This is a hack which makes it possible to deal with some Type 3
    // fonts.  The problem is that it's impossible to know what the
    // base coordinate system used in the font is without actually
    // rendering the font.  This code tries to guess by looking at the
    // width of the character 'm' (which breaks if the font is a
    // subset that doesn't contain 'm').
    mCode = letterCode = anyCode = -1;
    for (code = 0; code < 256; ++code) {
      name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
      if (name && name[0] == 'm' && name[1] == '\0') {
	mCode = code;
      }
      if (letterCode < 0 && name && name[1] == '\0' &&
	  ((name[0] >= 'A' && name[0] <= 'Z') ||
	   (name[0] >= 'a' && name[0] <= 'z'))) {
	letterCode = code;
      }
      if (anyCode < 0 && name &&
	  ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {
	anyCode = code;
      }
    }
    if (mCode >= 0 &&
	(w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {
      // 0.6 is a generic average 'm' width -- yes, this is a hack
      curFontSize *= w / 0.6;
    } else if (letterCode >= 0 &&
	       (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {
      // even more of a hack: 0.5 is a generic letter width
      curFontSize *= w / 0.5;
    } else if (anyCode >= 0 &&
	       (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {
      // better than nothing: 0.5 is a generic character width
      curFontSize *= w / 0.5;
    }
    fm = gfxFont->getFontMatrix();
    if (fm[0] != 0) {
      curFontSize *= fabs(fm[3] / fm[0]);
    }
  }

  if (gfxFont) {
	  // compute the rotation
	  state->getFontTransMat(&m[0], &m[1], &m[2], &m[3]);
	  if (gfxFont && gfxFont->getType() == fontType3) {
		  fm = gfxFont->getFontMatrix();
		  m2[0] = fm[0] * m[0] + fm[1] * m[2];
		  m2[1] = fm[0] * m[1] + fm[1] * m[3];
		  m2[2] = fm[2] * m[0] + fm[3] * m[2];
		  m2[3] = fm[2] * m[1] + fm[3] * m[3];
		  m[0] = m2[0];
		  m[1] = m2[1];
		  m[2] = m2[2];
		  m[3] = m2[3];
	  }
	  if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
		  curRot = (m[0] > 0 || m[3] < 0) ? 0 : 2;
	  } else {
		  curRot = (m[2] > 0) ? 1 : 3;
	  }
	  // for vertical writing mode, the lines are effectively rotated 90
	  // degrees, Joan modify this acrroding to old version
	  wMode = state->getFont() ? state->getFont()->getWMode() : 0;
	  if (wMode) {
		  curRot = (curRot + 1) & 3;
	  }
 }
}

void TextPage::addChar(GfxState *state, double x, double y,
		       double dx, double dy,
		       CharCode c, int nBytes, Unicode *u, int uLen) {
  double x1, y1, x2, y2, w1, h1, dx2, dy2, ascent, descent, sp;
  double xMin, yMin, xMax, yMax;
  double clipXMin, clipYMin, clipXMax, clipYMax;
  GfxRGB rgb;
  GBool clipped, rtl;
  int i, j, wMode;

  // if we're in an ActualText span, save the position info (the
  // ActualText chars will be added by TextPage::endActualText()).
  if (actualText) {
    if (!actualTextNBytes) {
      actualTextX0 = x;
      actualTextY0 = y;
    }
    actualTextX1 = x + dx;
    actualTextY1 = y + dy;
    actualTextNBytes += nBytes;
    return;
  }

  // subtract char and word spacing from the dx,dy values
  sp = state->getCharSpace();
  if (c == (CharCode)0x20) {
    sp += state->getWordSpace();
  }
  state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
  dx -= dx2;
  dy -= dy2;
  state->transformDelta(dx, dy, &w1, &h1);

  // throw away chars that aren't inside the page bounds
  // (and also do a sanity check on the character size)
  state->transform(x, y, &x1, &y1);
  if (x1 + w1 < 0 || x1 > pageWidth ||
      y1 + h1 < 0 || y1 > pageHeight ||
      w1 > pageWidth || h1 > pageHeight) {
    charPos += nBytes;
    return;
  }

  // check the tiny chars limit
  if (!globalParams->getTextKeepTinyChars() &&
      fabs(w1) < 3 && fabs(h1) < 3) {
    if (++nTinyChars > 50000) {
      charPos += nBytes;
      return;
    }
  }

  // skip space characters
  if (uLen == 1 && u[0] == (Unicode)0x20) {
    charPos += nBytes;
    return;
  }

  // check for clipping
  clipped = gFalse;
  if (control.clipText) {
    state->getClipBBox(&clipXMin, &clipYMin, &clipXMax, &clipYMax);
    if (x1 + 0.1 * w1 < clipXMin || x1 + 0.9 * w1 > clipXMax ||
	y1 + 0.1 * h1 < clipYMin || y1 + 0.9 * h1 > clipYMax) {
      clipped = gTrue;
    }
  }

  // add the characters
  if (uLen > 0) {

    // handle right-to-left ligatures: if there are multiple Unicode
    // characters, and they're all right-to-left, insert them in
    // right-to-left order
    if (uLen > 1) {
      rtl = gTrue;
      for (i = 0; i < uLen; ++i) {
	if (!unicodeTypeR(u[i])) {
	  rtl = gFalse;
	  break;
	}
      }
    } else {
      rtl = gFalse;
    }

    w1 /= uLen;
    h1 /= uLen;
    ascent = curFont->ascent * curFontSize;
    descent = curFont->descent * curFontSize;
	
	// for vertical writing mode, the lines are effectively rotated 90
	// degrees
	wMode = state->getFont() ? state->getFont()->getWMode() : 0;
    for (i = 0; i < uLen; ++i) {
      x2 = x1 + i * w1;
      y2 = y1 + i * h1;
	  //Joan add wMode acrroding to old version for correcting the position of vertical characters
	  if (wMode) { // vertical writing mode
		  // NB: the rotation value has been incremented by 1 (in
		  // TextPage::updateFont()) for vertical writing mode
		  switch (curRot) {
		  case 0:
			  xMin = x2 - w1;
			  xMax = x2;
			  yMin = y2 - curFontSize;
			  yMax = y2;
			  break;
		  case 1:
			  xMin = x2;
			  xMax = x2 + curFontSize;
			  yMin = y2 - h1;
			  yMax = y2;
			  break;
		  case 2:
			  xMin = x2;
			  xMax = x2 + w1;
			  yMin = y2;
			  yMax = y2 + curFontSize;
			  break;
		  case 3:
			  xMin = x2 - curFontSize;
			  xMax = x2;
			  yMin = y2;
			  yMax = y2 + h1;
			  break;
		  }
	  } //Joan+ end
	  else {
		  switch (curRot) {
		  case 0:
		  default:
			  xMin = x2;
			  xMax = x2 + w1;
			  yMin = y2 - ascent;
			  yMax = y2 - descent;
			  break;
		  case 1:
			  xMin = x2 + descent;
			  xMax = x2 + ascent;
			  yMin = y2;
			  yMax = y2 + h1;
			  break;
		  case 2:
			  xMin = x2 + w1;
			  xMax = x2;
			  yMin = y2 + descent;
			  yMax = y2 + ascent;
			  break;
		  case 3:
			  xMin = x2 - ascent;
			  xMax = x2 - descent;
			  yMin = y2 + h1;
			  yMax = y2;
			  break;
		  }
	  }
      if ((state->getRender() & 3) == 1) {
	state->getStrokeRGB(&rgb);
      } else {
	state->getFillRGB(&rgb);
      }
      if (rtl) {
	j = uLen - 1 - i;
      } else {
	j = i;
      }
      chars->append(new TextChar(u[j], charPos, nBytes, xMin, yMin, xMax, yMax,
				 curRot, clipped,
				 state->getRender() == 3,
				 curFont, curFontSize,
				 colToDbl(rgb.r), colToDbl(rgb.g),
				 colToDbl(rgb.b)));
    }
  }

  charPos += nBytes;
}

GBool TextPage::findText(Unicode *s, int len,
			 GBool startAtTop, GBool stopAtBottom,
			 GBool startAtLast, GBool stopAtLast,
			 GBool caseSensitive, GBool backward,
			 GBool wholeWord,
			 double *xMin, double *yMin,
			 double *xMax, double *yMax) {
  TextBlock *tree;
  TextColumn *column;
  TextParagraph *par;
  TextLine *line;
  Unicode *s2, *txt;
  Unicode *p;
  double xStart, yStart, xStop, yStop;
  double xMin0, yMin0, xMax0, yMax0;
  double xMin1, yMin1, xMax1, yMax1;
  GBool found;
  int txtSize, m, rot, colIdx, parIdx, lineIdx, i, j, k;

  //~ need to handle right-to-left text

  if (!findCols) {
	 //Joan annotate this at 20151021 for finding vertical text
    //rot = rotateChars(chars);
    if ((tree = splitChars(chars))) {
      findCols = buildColumns(tree);
      delete tree;
    } else {
      // no text
      findCols = new GList();
    }
    //unrotateChars(chars, rot);
    //unrotateColumns(findCols, rot);
	//Joan modify end
  }

  // convert the search string to uppercase
  if (!caseSensitive) {
    s2 = (Unicode *)gmallocn(len, sizeof(Unicode));
    for (i = 0; i < len; ++i) {
      s2[i] = unicodeToUpper(s[i]);
    }
  } else {
    s2 = s;
  }

  txt = NULL;
  txtSize = 0;

  xStart = yStart = xStop = yStop = 0;
  if (startAtLast && haveLastFind) {
    xStart = lastFindXMin;
    yStart = lastFindYMin;
  } else if (!startAtTop) {
    xStart = *xMin;
    yStart = *yMin;
  }
  if (stopAtLast && haveLastFind) {
    xStop = lastFindXMin;
    yStop = lastFindYMin;
  } else if (!stopAtBottom) {
    xStop = *xMax;
    yStop = *yMax;
  }

  found = gFalse;
  xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
  xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy

  for (colIdx = backward ? findCols->getLength() - 1 : 0;
       backward ? colIdx >= 0 : colIdx < findCols->getLength();
       colIdx += backward ? -1 : 1) {
    column = (TextColumn *)findCols->get(colIdx);

    // check: is the column above the top limit?
    if (!startAtTop && (backward ? column->yMin > yStart
			         : column->yMax < yStart)) {
      continue;
    }

    // check: is the column below the bottom limit?
    if (!stopAtBottom && (backward ? column->yMax < yStop
			           : column->yMin > yStop)) {
      continue;
    }

    for (parIdx = backward ? column->paragraphs->getLength() - 1 : 0;
	 backward ? parIdx >= 0 : parIdx < column->paragraphs->getLength();
	 parIdx += backward ? -1 : 1) {
      par = (TextParagraph *)column->paragraphs->get(parIdx);

      // check: is the paragraph above the top limit?
      if (!startAtTop && (backward ? par->yMin > yStart
			           : par->yMax < yStart)) {
	continue;
      }

      // check: is the paragraph below the bottom limit?
      if (!stopAtBottom && (backward ? par->yMax < yStop
			             : par->yMin > yStop)) {
	continue;
      }

      for (lineIdx = backward ? par->lines->getLength() - 1 : 0;
	   backward ? lineIdx >= 0 : lineIdx < par->lines->getLength();
	   lineIdx += backward ? -1 : 1) {
	line = (TextLine *)par->lines->get(lineIdx);

	// check: is the line above the top limit?
	if (!startAtTop && (backward ? line->yMin > yStart
			             : line->yMax < yStart)) {
	  continue;
	}

	// check: is the line below the bottom limit?
	if (!stopAtBottom && (backward ? line->yMax < yStop
			               : line->yMin > yStop)) {
	  continue;
	}

	// convert the line to uppercase
	m = line->len;
	if (!caseSensitive) {
	  if (m > txtSize) {
	    txt = (Unicode *)greallocn(txt, m, sizeof(Unicode));
	    txtSize = m;
	  }
	  for (k = 0; k < m; ++k) {
	    txt[k] = unicodeToUpper(line->text[k]);
	  }
	} else {
	  txt = line->text;
	}

	// search each position in this line
	j = backward ? m - len : 0;
	p = txt + j;
	while (backward ? j >= 0 : j <= m - len) {
	  if (!wholeWord ||
	      ((j == 0 || !unicodeTypeWord(txt[j - 1])) &&
	       (j + len == m || !unicodeTypeWord(txt[j + len])))) {

	    // compare the strings
	    for (k = 0; k < len; ++k) {
	      if (p[k] != s2[k]) {
		break;
	      }
	    }

	    // found it
	    if (k == len) {
	      switch (line->rot) {
	      case 0:
		xMin1 = line->edge[j];
		xMax1 = line->edge[j + len];
		yMin1 = line->yMin;
		yMax1 = line->yMax;
		break;
	      case 1:
		xMin1 = line->xMin;
		xMax1 = line->xMax;
		yMin1 = line->edge[j];
		yMax1 = line->edge[j + len];
		break;
	      case 2:
		xMin1 = line->edge[j + len];
		xMax1 = line->edge[j];
		yMin1 = line->yMin;
		yMax1 = line->yMax;
		break;
	      case 3:
		xMin1 = line->xMin;
		xMax1 = line->xMax;
		yMin1 = line->edge[j + len];
		yMax1 = line->edge[j];
		break;
	      }
	      if (backward) {
		if ((startAtTop ||
		     yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) &&
		    (stopAtBottom ||
		     yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) {
		  if (!found ||
		      yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) {
		    xMin0 = xMin1;
		    xMax0 = xMax1;
		    yMin0 = yMin1;
		    yMax0 = yMax1;
		    found = gTrue;
		  }
		}
	      } else {
		if ((startAtTop ||
		     yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) &&
		    (stopAtBottom ||
		     yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) {
		  if (!found ||
		      yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) {
		    xMin0 = xMin1;
		    xMax0 = xMax1;
		    yMin0 = yMin1;
		    yMax0 = yMax1;
		    found = gTrue;
		  }
		}
	      }
	    }
	  }
	  if (backward) {
	    --j;
	    --p;
	  } else {
	    ++j;
	    ++p;
	  }
	}
      }
    }
  }

  if (!caseSensitive) {
    gfree(s2);
    gfree(txt);
  }

  if (found) {
    *xMin = xMin0;
    *xMax = xMax0;
    *yMin = yMin0;
    *yMax = yMax0;
    lastFindXMin = xMin0;
    lastFindYMin = yMin0;
    haveLastFind = gTrue;
    return gTrue;
  }

  return gFalse;
}

void TextOutputDev::drawChar(GfxState *state, double x, double y,
			     double dx, double dy,
			     double originX, double originY,
			     CharCode c, int nBytes, Unicode *u, int uLen) {
	//Joan modify the second and third argument acrroding to the old version
  text->addChar(state, x - originX, y - originY, dx, dy, c, nBytes, u, uLen);
}