THRIFT-2409 UTF-8 sent by PHP as JSON is not understood by TJsonProtocol

Client: Java
Patch: Phongphan Phuttha <phongphan@acm.org>

This closes #667
This commit is contained in:
Phongphan Phuttha 2015-10-30 00:18:54 +07:00 committed by Jens Geyer
parent 11b515cd29
commit 54beb80ded
2 changed files with 57 additions and 4 deletions

View File

@ -19,8 +19,10 @@
package org.apache.thrift.protocol;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Stack;
import org.apache.thrift.TByteArrayOutputStream;
@ -640,6 +642,7 @@ public class TJSONProtocol extends TProtocol {
private TByteArrayOutputStream readJSONString(boolean skipContext)
throws TException {
TByteArrayOutputStream arr = new TByteArrayOutputStream(DEF_STRING_SIZE);
ArrayList<Character> codeunits = new ArrayList<Character>();
if (!skipContext) {
context_.read();
}
@ -652,10 +655,43 @@ public class TJSONProtocol extends TProtocol {
if (ch == ESCSEQ[0]) {
ch = reader_.read();
if (ch == ESCSEQ[1]) {
readJSONSyntaxChar(ZERO);
readJSONSyntaxChar(ZERO);
trans_.readAll(tmpbuf_, 0, 2);
ch = (byte)((hexVal((byte)tmpbuf_[0]) << 4) + hexVal(tmpbuf_[1]));
trans_.readAll(tmpbuf_, 0, 4);
short cu = (short)(
((short)hexVal(tmpbuf_[0]) << 12) +
((short)hexVal(tmpbuf_[1]) << 8) +
((short)hexVal(tmpbuf_[2]) << 4) +
(short)hexVal(tmpbuf_[3]));
try {
if (Character.isHighSurrogate((char)cu)) {
if (codeunits.size() > 0) {
throw new TProtocolException(TProtocolException.INVALID_DATA,
"Expected low surrogate char");
}
codeunits.add((char)cu);
}
else if (Character.isLowSurrogate((char)cu)) {
if (codeunits.size() == 0) {
throw new TProtocolException(TProtocolException.INVALID_DATA,
"Expected high surrogate char");
}
codeunits.add((char)cu);
arr.write((new String(new int[] { codeunits.get(0), codeunits.get(1) }, 0, 2)).getBytes("UTF-8"));
codeunits.clear();
}
else {
arr.write((new String(new int[] { cu }, 0, 1)).getBytes("UTF-8"));
}
continue;
}
catch (UnsupportedEncodingException ex) {
throw new TProtocolException(TProtocolException.NOT_IMPLEMENTED,
"JVM does not support UTF-8");
}
catch (IOException ex) {
throw new TProtocolException(TProtocolException.INVALID_DATA,
"Invalid unicode sequence");
}
}
else {
int off = ESCAPE_CHARS.indexOf(ch);

View File

@ -18,6 +18,12 @@
*/
package org.apache.thrift.protocol;
import java.io.IOException;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TJSONProtocol;
import org.apache.thrift.transport.TMemoryBuffer;
public class TestTJSONProtocol extends ProtocolTestBase {
@Override
protected TProtocolFactory getFactory() {
@ -28,4 +34,15 @@ public class TestTJSONProtocol extends ProtocolTestBase {
protected boolean canBeUsedNaked() {
return false;
}
public void testEscapedUnicode() throws TException, IOException {
String jsonString = "\"hello unicode \\u0e01\\ud834\\udd1e world\"";
String expectedString = "hello unicode \u0e01\ud834\udd1e world";
TMemoryBuffer buffer = new TMemoryBuffer(1000);
TJSONProtocol protocol = new TJSONProtocol(buffer);
buffer.write(jsonString.getBytes("UTF-8"));
assertEquals(expectedString, protocol.readString());
}
}