THRIFT-2365 C# decodes too many binary bytes from JSON

Patch: Jens Geyer
This commit is contained in:
Jens Geyer 2014-02-16 15:48:57 +01:00
parent 023192f7e1
commit 06ad7218a9
2 changed files with 103 additions and 70 deletions

View File

@ -737,7 +737,7 @@ namespace Thrift.Protocol
// escaped?
if (ch != ESCSEQ[0])
{
{
buffer.Write(new byte[] { (byte)ch }, 0, 1);
continue;
}
@ -752,20 +752,20 @@ namespace Thrift.Protocol
throw new TProtocolException(TProtocolException.INVALID_DATA,
"Expected control char");
}
ch = ESCAPE_CHAR_VALS[off];
buffer.Write(new byte[] { (byte)ch }, 0, 1);
ch = ESCAPE_CHAR_VALS[off];
buffer.Write(new byte[] { (byte)ch }, 0, 1);
continue;
}
// it's \uXXXX
trans.ReadAll(tempBuffer, 0, 4);
var wch = (short)((HexVal((byte)tempBuffer[0]) << 12) +
(HexVal((byte)tempBuffer[1]) << 8) +
(HexVal((byte)tempBuffer[2]) << 4) +
HexVal(tempBuffer[3]));
var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });
buffer.Write(tmp, 0, tmp.Length);
}
// it's \uXXXX
trans.ReadAll(tempBuffer, 0, 4);
var wch = (short)((HexVal((byte)tempBuffer[0]) << 12) +
(HexVal((byte)tempBuffer[1]) << 8) +
(HexVal((byte)tempBuffer[2]) << 4) +
HexVal(tempBuffer[3]));
var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });
buffer.Write(tmp, 0, tmp.Length);
}
return buffer.ToArray();
}
@ -891,7 +891,13 @@ namespace Thrift.Protocol
int len = b.Length;
int off = 0;
int size = 0;
while (len >= 4)
// reduce len to ignore fill bytes
while ((len > 0) && (b[len - 1] == '='))
{
--len;
}
// read & decode full byte triplets = 4 source bytes
while (len > 4)
{
// Decode 4 bytes at a time
TBase64Utils.decode(b, off, 4, b, size); // NB: decoded in place

View File

@ -1,55 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using Thrift.Protocol;
using Thrift.Transport;
namespace JSONTest
{
class Program
{
static void Main(string[] args)
{
TestThrift2336();
}
public static void TestThrift2336()
{
const string RUSSIAN_TEXT = "\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435";
const string RUSSIAN_JSON = "\"\\u0420\\u0443\\u0441\\u0441\\u043a\\u043e\\u0435 \\u041d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0435\"";
// prepare buffer with JOSN data
byte[] rawBytes = new byte[RUSSIAN_JSON.Length];
for (var i = 0; i < RUSSIAN_JSON.Length; ++i)
rawBytes[i] = (byte)(RUSSIAN_JSON[i] & (char)0xFF); // only low bytes
// parse and check
var stm = new MemoryStream(rawBytes);
var trans = new TStreamTransport(stm, null);
var prot = new TJSONProtocol(trans);
Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit");
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using Thrift.Protocol;
using Thrift.Transport;
namespace JSONTest
{
class Program
{
static void Main(string[] args)
{
TestThrift2365(); // JSON binary decodes too much data
TestThrift2336(); // hex encoding using \uXXXX where 0xXXXX > 0xFF
}
public static void TestThrift2365()
{
var rnd = new Random();
for (var len = 0; len < 10; ++len)
{
byte[] dataWritten = new byte[len];
rnd.NextBytes(dataWritten);
Stream stm = new MemoryStream();
TTransport trans = new TStreamTransport(null, stm);
TProtocol prot = new TJSONProtocol(trans);
prot.WriteBinary(dataWritten);
stm.Position = 0;
trans = new TStreamTransport(stm, null);
prot = new TJSONProtocol(trans);
byte[] dataRead = prot.ReadBinary();
Debug.Assert(dataRead.Length == dataWritten.Length);
for (var i = 0; i < dataRead.Length; ++i)
Debug.Assert(dataRead[i] == dataWritten[i]);
}
}
public static void TestThrift2336()
{
const string RUSSIAN_TEXT = "\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435";
const string RUSSIAN_JSON = "\"\\u0420\\u0443\\u0441\\u0441\\u043a\\u043e\\u0435 \\u041d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0435\"";
// prepare buffer with JSON data
byte[] rawBytes = new byte[RUSSIAN_JSON.Length];
for (var i = 0; i < RUSSIAN_JSON.Length; ++i)
rawBytes[i] = (byte)(RUSSIAN_JSON[i] & (char)0xFF); // only low bytes
// parse and check
var stm = new MemoryStream(rawBytes);
var trans = new TStreamTransport(stm, null);
var prot = new TJSONProtocol(trans);
Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit");
}
}
}