在org.apache.xerces.impl.XMLEntityScanner的方法public int scanContent(XMLString content)中舍弃了\r,
原代码:
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean external = fCurrentEntity.isExternal();
if (c == '\n' || (c == '\r' && external)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
XMLEntityManager.print(fCurrentEntity);
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false)) {
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
修改方式是将一些行注释掉,见有//的行:
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean external = fCurrentEntity.isExternal();
if (c == '\n' || (c == '\r' && external)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
XMLEntityManager.print(fCurrentEntity);
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
// offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false)) {
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
// for (int i = offset; i < fCurrentEntity.position; i++) {
// fCurrentEntity.ch[i] = '\n';
// }