185 lines
7.1 KiB
Java
185 lines
7.1 KiB
Java
/*
|
|
* Copyright (c) 2015 Mozilla Foundation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
package nu.validator.encoding;
|
|
|
|
import java.nio.ByteBuffer;
|
|
import java.nio.CharBuffer;
|
|
import java.nio.charset.Charset;
|
|
import java.nio.charset.CoderResult;
|
|
|
|
public class Big5Decoder extends Decoder {
|
|
|
|
private int big5Lead = 0;
|
|
|
|
private char pendingTrail = '\u0000';
|
|
|
|
protected Big5Decoder(Charset cs) {
|
|
super(cs, 0.5f, 1.0f);
|
|
}
|
|
|
|
@Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
|
assert !(this.report && (big5Lead != 0)):
|
|
"When reporting, this method should never return with big5Lead set.";
|
|
if (pendingTrail != '\u0000') {
|
|
if (!out.hasRemaining()) {
|
|
return CoderResult.OVERFLOW;
|
|
}
|
|
out.put(pendingTrail);
|
|
pendingTrail = '\u0000';
|
|
}
|
|
for (;;) {
|
|
if (!in.hasRemaining()) {
|
|
return CoderResult.UNDERFLOW;
|
|
}
|
|
if (!out.hasRemaining()) {
|
|
return CoderResult.OVERFLOW;
|
|
}
|
|
int b = ((int) in.get() & 0xFF);
|
|
if (big5Lead == 0) {
|
|
if (b <= 0x7F) {
|
|
out.put((char) b);
|
|
continue;
|
|
}
|
|
if (b >= 0x81 && b <= 0xFE) {
|
|
if (this.report && !in.hasRemaining()) {
|
|
// The Java API is badly documented. Need to do this
|
|
// crazy thing and hope the caller knows about the
|
|
// undocumented aspects of the API!
|
|
in.position(in.position() - 1);
|
|
return CoderResult.UNDERFLOW;
|
|
}
|
|
big5Lead = b;
|
|
continue;
|
|
}
|
|
if (this.report) {
|
|
in.position(in.position() - 1);
|
|
return CoderResult.malformedForLength(1);
|
|
}
|
|
out.put('\uFFFD');
|
|
continue;
|
|
}
|
|
int lead = big5Lead;
|
|
big5Lead = 0;
|
|
int offset = (b < 0x7F) ? 0x40 : 0x62;
|
|
if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) {
|
|
int pointer = (lead - 0x81) * 157 + (b - offset);
|
|
char outTrail;
|
|
switch (pointer) {
|
|
case 1133:
|
|
out.put('\u00CA');
|
|
outTrail = '\u0304';
|
|
break;
|
|
case 1135:
|
|
out.put('\u00CA');
|
|
outTrail = '\u030C';
|
|
break;
|
|
case 1164:
|
|
out.put('\u00EA');
|
|
outTrail = '\u0304';
|
|
break;
|
|
case 1166:
|
|
out.put('\u00EA');
|
|
outTrail = '\u030C';
|
|
break;
|
|
default:
|
|
char lowBits = Big5Data.lowBits(pointer);
|
|
if (lowBits == '\u0000') {
|
|
// The following |if| block fixes
|
|
// https://github.com/whatwg/encoding/issues/5
|
|
if (b <= 0x7F) {
|
|
// prepend byte to stream
|
|
// Always legal, since we've always just read a byte
|
|
// if we come here.
|
|
in.position(in.position() - 1);
|
|
}
|
|
if (this.report) {
|
|
// This can go past the start of the buffer
|
|
// if the caller does not conform to the
|
|
// undocumented aspects of the API.
|
|
in.position(in.position() - 1);
|
|
return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2);
|
|
}
|
|
out.put('\uFFFD');
|
|
continue;
|
|
}
|
|
if (Big5Data.isAstral(pointer)) {
|
|
int codePoint = lowBits | 0x20000;
|
|
out.put((char) (0xD7C0 + (codePoint >> 10)));
|
|
outTrail = (char) (0xDC00 + (codePoint & 0x3FF));
|
|
break;
|
|
}
|
|
out.put(lowBits);
|
|
continue;
|
|
}
|
|
if (!out.hasRemaining()) {
|
|
pendingTrail = outTrail;
|
|
return CoderResult.OVERFLOW;
|
|
}
|
|
out.put(outTrail);
|
|
continue;
|
|
}
|
|
// pointer is null
|
|
if (b <= 0x7F) {
|
|
// prepend byte to stream
|
|
// Always legal, since we've always just read a byte
|
|
// if we come here.
|
|
in.position(in.position() - 1);
|
|
}
|
|
if (this.report) {
|
|
// if position() == 0, the caller is not using the
|
|
// undocumented part of the API right and the line
|
|
// below will throw!
|
|
in.position(in.position() - 1);
|
|
return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2);
|
|
}
|
|
out.put('\uFFFD');
|
|
continue;
|
|
}
|
|
}
|
|
|
|
@Override protected CoderResult implFlush(CharBuffer out) {
|
|
if (pendingTrail != '\u0000') {
|
|
if (!out.hasRemaining()) {
|
|
return CoderResult.OVERFLOW;
|
|
}
|
|
out.put(pendingTrail);
|
|
pendingTrail = '\u0000';
|
|
}
|
|
if (big5Lead != 0) {
|
|
assert !this.report: "How come big5Lead got to be non-zero when decodeLoop() returned in the reporting mode?";
|
|
if (!out.hasRemaining()) {
|
|
return CoderResult.OVERFLOW;
|
|
}
|
|
out.put('\uFFFD');
|
|
big5Lead = 0;
|
|
}
|
|
return CoderResult.UNDERFLOW;
|
|
}
|
|
|
|
@Override protected void implReset() {
|
|
big5Lead = 0;
|
|
pendingTrail = '\u0000';
|
|
}
|
|
|
|
}
|