THRIFT-2779: Always write unescaped JSON unicode string.

Client: PHP
Patch: Phongphan Phuttha

This closes #666
This commit is contained in:
Phongphan Phuttha 2015-10-30 00:00:10 +07:00 committed by Nobuaki Sukegawa
parent c04fb0069e
commit 90ea4f64c1
3 changed files with 62 additions and 2 deletions

View File

@ -215,6 +215,44 @@ class TJSONProtocol extends TProtocol
return dechex($val);
}
private function hasJSONUnescapedUnicode()
{
if (PHP_MAJOR_VERSION > 5
|| (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION >= 4))
return true;
return false;
}
private function unescapedUnicode($str)
{
if ($this->hasJSONUnescapedUnicode()) {
return json_encode($str, JSON_UNESCAPED_UNICODE);
}
$json = json_encode($str);
/*
* Unescaped character outside the Basic Multilingual Plane
* High surrogate: 0xD800 - 0xDBFF
* Low surrogate: 0xDC00 - 0xDFFF
*/
$json = preg_replace_callback('/\\\\u(d[89ab][0-9a-f]{2})\\\\u(d[cdef][0-9a-f]{2})/i',
function ($matches) {
return mb_convert_encoding(pack('H*', $matches[1].$matches[2]), 'UTF-8', 'UTF-16BE');
}, $json);
/*
* Unescaped characters within the Basic Multilingual Plane
*/
$json = preg_replace_callback('/\\\\u([0-9a-f]{4})/i',
function ($matches) {
return mb_convert_encoding(pack('H*', $matches[1]), 'UTF-8', 'UTF-16BE');
}, $json);
return $json;
}
private function writeJSONString($b)
{
$this->context_->write();
@ -223,7 +261,7 @@ class TJSONProtocol extends TProtocol
$this->trans_->write(self::QUOTE);
}
$this->trans_->write(json_encode($b));
$this->trans_->write($this->unescapedUnicode($b));
if (is_numeric($b) && $this->context_->escapeNum()) {
$this->trans_->write(self::QUOTE);

View File

@ -46,6 +46,9 @@ class Fixtures
self::$testArgs['testString3'] =
"string that ends in double-backslash \\\\";
self::$testArgs['testUnicodeStringWithNonBMP'] =
"สวัสดี/𝒯";
self::$testArgs['testDouble'] = 3.1415926535898;
// TODO: add testBinary() call

View File

@ -200,7 +200,12 @@ class TestTJSONProtocol extends \PHPUnit_Framework_TestCase
$actual = $this->transport->read( BUFSIZ );
$expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testStringMap'];
$this->assertEquals( $expected, $actual );
/*
* The $actual returns unescaped string.
* It is required to to decode then encode it again
* to get the expected escaped unicode.
*/
$this->assertEquals( $expected, json_encode(json_decode($actual)) );
}
public function testSet_Write()
@ -308,6 +313,18 @@ class TestTJSONProtocol extends \PHPUnit_Framework_TestCase
$this->assertEquals( $expected, $actual );
}
public function testString4_Write()
{
$args = new \ThriftTest\ThriftTest_testString_args();
$args->thing = Fixtures::$testArgs['testUnicodeStringWithNonBMP'];
$args->write( $this->protocol );
$actual = $this->transport->read( BUFSIZ );
$expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testUnicodeStringWithNonBMP'];
$this->assertEquals( $expected, $actual );
}
public function testDouble_Read()
{
$this->transport->write(
@ -528,6 +545,8 @@ class TestTJSONProtocol_Fixtures
self::$testArgsJSON['testString3'] = '{"1":{"str":"string that ends in double-backslash \\\\\\\\"}}';
self::$testArgsJSON['testUnicodeStringWithNonBMP'] = '{"1":{"str":"สวัสดี\/𝒯"}}';
self::$testArgsJSON['testDouble'] = '{"1":{"dbl":3.1415926535898}}';
self::$testArgsJSON['testByte'] = '{"1":{"i8":1}}';