From d815c218c8c48ab7bd930aa0ea8913bc9b2ce872 Mon Sep 17 00:00:00 2001 From: Bryan Duxbury Date: Thu, 19 Mar 2009 18:57:43 +0000 Subject: [PATCH] THRIFT-332. rb: Compact Protocol in Ruby This patch includes both a pure Ruby and C-extension port of the Compact Protocol described in THRIFT-110. It also fixes a bug in struct.c that was interfering with native protocol method calls, and adds some utility classes to the Java library for serializing/deserializing to a file for the purpose of testing protocols cross-language. git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@756133 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/thrift/test/ReadStruct.java | 43 ++ .../thrift/test/TCompactProtocolTest.java | 38 +- .../org/apache/thrift/test/WriteStruct.java | 28 + lib/rb/Manifest | 6 +- lib/rb/Rakefile | 7 +- lib/rb/ext/binary_protocol_accelerated.c | 9 +- lib/rb/ext/compact_protocol.c | 645 ++++++++++++++++++ lib/rb/ext/compact_protocol.h | 1 + lib/rb/ext/macros.h | 7 + lib/rb/ext/struct.c | 112 +-- lib/rb/ext/thrift_native.c | 2 + .../lib/thrift/protocol/compact_protocol.rb | 405 +++++++++++ lib/rb/lib/thrift/struct.rb | 91 +-- lib/rb/lib/thrift/transport.rb | 16 + lib/rb/script/proto_benchmark.rb | 104 +++ lib/rb/script/read_struct.rb | 25 + lib/rb/script/write_struct.rb | 12 + lib/rb/spec/compact_protocol_spec.rb | 110 +++ lib/rb/spec/deprecation_spec.rb | 3 + lib/rb/spec/spec_helper.rb | 9 +- lib/rb/spec/struct_spec.rb | 12 +- 21 files changed, 1544 insertions(+), 141 deletions(-) create mode 100644 lib/java/test/org/apache/thrift/test/ReadStruct.java create mode 100644 lib/java/test/org/apache/thrift/test/WriteStruct.java create mode 100644 lib/rb/ext/compact_protocol.c create mode 100644 lib/rb/ext/compact_protocol.h create mode 100644 lib/rb/ext/macros.h create mode 100644 lib/rb/lib/thrift/protocol/compact_protocol.rb create mode 100644 lib/rb/script/proto_benchmark.rb create mode 100644 lib/rb/script/read_struct.rb create mode 100644 lib/rb/script/write_struct.rb create mode 100644 lib/rb/spec/compact_protocol_spec.rb diff --git a/lib/java/test/org/apache/thrift/test/ReadStruct.java b/lib/java/test/org/apache/thrift/test/ReadStruct.java new file mode 100644 index 000000000..75f1455c8 --- /dev/null +++ b/lib/java/test/org/apache/thrift/test/ReadStruct.java @@ -0,0 +1,43 @@ +package org.apache.thrift.test; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; + +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.protocol.TProtocolFactory; +import org.apache.thrift.transport.TIOStreamTransport; +import org.apache.thrift.transport.TTransport; + +import thrift.test.CompactProtoTestStruct; + +public class ReadStruct { + public static void main(String[] args) throws Exception { + if (args.length != 2) { + System.out.println("usage: java -cp build/classes org.apache.thrift.test.ReadStruct filename proto_factory_class"); + System.out.println("Read in an instance of CompactProtocolTestStruct from 'file', making sure that it is equivalent to Fixtures.compactProtoTestStruct. Use a protocol from 'proto_factory_class'."); + } + + TTransport trans = new TIOStreamTransport(new BufferedInputStream(new FileInputStream(args[0]))); + + TProtocolFactory factory = (TProtocolFactory)Class.forName(args[1]).newInstance(); + + TProtocol proto = factory.getProtocol(trans); + + CompactProtoTestStruct cpts = new CompactProtoTestStruct(); + + for (Integer fid : CompactProtoTestStruct.metaDataMap.keySet()) { + cpts.setFieldValue(fid, null); + } + + cpts.read(proto); + + if (cpts.equals(Fixtures.compactProtoTestStruct)) { + System.out.println("Object verified successfully!"); + } else { + System.out.println("Object failed verification!"); + System.out.println("Expected: " + Fixtures.compactProtoTestStruct + " but got " + cpts); + } + + } + +} diff --git a/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java b/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java index 88a55e251..4bc74a7ee 100755 --- a/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java +++ b/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java @@ -78,35 +78,17 @@ public class TCompactProtocolTest { testI32Field(-0xffffff); testNakedI64(0); - testNakedI64(1); - testNakedI64(0xff); - testNakedI64(0xffff); - testNakedI64(0xffffff); - testNakedI64(6000000000L); - testNakedI64(Long.MAX_VALUE); - testNakedI64(-1); - testNakedI64(-0xff); - testNakedI64(-0xffff); - testNakedI64(-0xffffff); - testNakedI64(-6000000000L); - + for (int i = 0; i < 62; i++) { + testNakedI64(1L << i); + testNakedI64(-(1L << i)); + } + testI64Field(0); - testI64Field(1); - testI64Field(0xff); - testI64Field(0xffff); - testI64Field(0xffffff); - testI64Field(6000000000L); - testI64Field(1L << 32); - testI64Field(1L << 48); - testI64Field(1L << 55); - testI64Field(Long.MAX_VALUE); - testI64Field(-1); - testI64Field(-0xff); - testI64Field(-0xffff); - testI64Field(-0xffffff); - testI64Field(-6000000000L); - testI64Field(-1*Long.MAX_VALUE); - + for (int i = 0; i < 62; i++) { + testI64Field(1L << i); + testI64Field(-(1L << i)); + } + testDouble(); testNakedString(""); diff --git a/lib/java/test/org/apache/thrift/test/WriteStruct.java b/lib/java/test/org/apache/thrift/test/WriteStruct.java new file mode 100644 index 000000000..35a583168 --- /dev/null +++ b/lib/java/test/org/apache/thrift/test/WriteStruct.java @@ -0,0 +1,28 @@ +package org.apache.thrift.test; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; + +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.protocol.TProtocolFactory; +import org.apache.thrift.transport.TIOStreamTransport; +import org.apache.thrift.transport.TTransport; + +public class WriteStruct { + public static void main(String[] args) throws Exception { + if (args.length != 2) { + System.out.println("usage: java -cp build/classes org.apache.thrift.test.WriteStruct filename proto_factory_class"); + System.out.println("Write out an instance of Fixtures.compactProtocolTestStruct to 'file'. Use a protocol from 'proto_factory_class'."); + } + + TTransport trans = new TIOStreamTransport(new BufferedOutputStream(new FileOutputStream(args[0]))); + + TProtocolFactory factory = (TProtocolFactory)Class.forName(args[1]).newInstance(); + + TProtocol proto = factory.getProtocol(trans); + + Fixtures.compactProtoTestStruct.write(proto); + trans.flush(); + } + +} diff --git a/lib/rb/Manifest b/lib/rb/Manifest index c29123288..beddc3b04 100644 --- a/lib/rb/Manifest +++ b/lib/rb/Manifest @@ -15,10 +15,11 @@ ext/struct.h ext/protocol.h ext/protocol.c ext/constants.h +ext/macros.h ext/memory_buffer.h ext/memory_buffer.c -ext/rope_transport.c -ext/rope_transport.h +ext/compact_protocol.h +ext/compact_protocol.c ext/thrift_native.c ext/extconf.rb lib/thrift/client.rb @@ -28,6 +29,7 @@ lib/thrift/processor.rb lib/thrift/protocol/binaryprotocol.rb lib/thrift/protocol/binaryprotocolaccelerated.rb lib/thrift/protocol/tbinaryprotocol.rb +lib/thrift/protocol/compact_protocol.rb lib/thrift/protocol/tprotocol.rb lib/thrift/protocol.rb lib/thrift/serializer.rb diff --git a/lib/rb/Rakefile b/lib/rb/Rakefile index b5aa2ad84..96d14482c 100644 --- a/lib/rb/Rakefile +++ b/lib/rb/Rakefile @@ -27,7 +27,7 @@ task :test do end desc 'Compile the .thrift files for the specs' -task :'gen-rb' => [:'gen-rb:spec', :'gen-rb:benchmark'] +task :'gen-rb' => [:'gen-rb:spec', :'gen-rb:benchmark', :'gen-rb:debug_proto'] namespace :'gen-rb' do task :'spec' do @@ -39,6 +39,11 @@ namespace :'gen-rb' do dir = File.dirname(__FILE__) + '/benchmark' sh THRIFT, '--gen', 'rb', '-o', dir, "#{dir}/Benchmark.thrift" end + + task :'debug_proto' do + sh "mkdir", "-p", "debug_proto_test" + sh THRIFT, '--gen', 'rb', "-o", "debug_proto_test", "../../test/DebugProtoTest.thrift" + end end desc 'Run benchmarking of NonblockingServer' diff --git a/lib/rb/ext/binary_protocol_accelerated.c b/lib/rb/ext/binary_protocol_accelerated.c index fc4b675b7..bb4f4a3ae 100644 --- a/lib/rb/ext/binary_protocol_accelerated.c +++ b/lib/rb/ext/binary_protocol_accelerated.c @@ -3,12 +3,7 @@ #include #include #include - -#define GET_TRANSPORT(obj) rb_ivar_get(obj, transport_ivar_id) -#define GET_STRICT_READ(obj) rb_ivar_get(obj, strict_read_ivar_id) -#define GET_STRICT_WRITE(obj) rb_ivar_get(obj, strict_write_ivar_id) -#define WRITE(obj, data, length) rb_funcall(obj, write_method_id, 1, rb_str_new(data, length)) -#define CHECK_NIL(obj) if (NIL_P(obj)) { rb_raise(rb_eStandardError, "nil argument not allowed!");} +#include "macros.h" VALUE rb_thrift_binary_proto_native_qmark(VALUE self) { return Qtrue; @@ -205,8 +200,6 @@ VALUE rb_thrift_binary_proto_write_string(VALUE self, VALUE str) { // interface reading methods //--------------------------------------- -#define READ(obj, length) rb_funcall(GET_TRANSPORT(obj), read_method_id, 1, INT2FIX(length)) - VALUE rb_thrift_binary_proto_read_string(VALUE self); VALUE rb_thrift_binary_proto_read_byte(VALUE self); VALUE rb_thrift_binary_proto_read_i32(VALUE self); diff --git a/lib/rb/ext/compact_protocol.c b/lib/rb/ext/compact_protocol.c new file mode 100644 index 000000000..65b942cae --- /dev/null +++ b/lib/rb/ext/compact_protocol.c @@ -0,0 +1,645 @@ +#include +#include +#include +#include +#include +#include "macros.h" + +#define LAST_ID(obj) FIX2INT(rb_ary_pop(rb_ivar_get(obj, last_field_id))) +#define SET_LAST_ID(obj, val) rb_ary_push(rb_ivar_get(obj, last_field_id), val) + +VALUE rb_thrift_compact_proto_native_qmark(VALUE self) { + return Qtrue; +} + +static ID last_field_id; +static ID boolean_field_id; +static ID bool_value_id; + +static int VERSION; +static int VERSION_MASK; +static int TYPE_MASK; +static int TYPE_SHIFT_AMOUNT; +static int PROTOCOL_ID; + +static VALUE thrift_compact_protocol_class; + +static int CTYPE_BOOLEAN_TRUE = 0x01; +static int CTYPE_BOOLEAN_FALSE = 0x02; +static int CTYPE_BYTE = 0x03; +static int CTYPE_I16 = 0x04; +static int CTYPE_I32 = 0x05; +static int CTYPE_I64 = 0x06; +static int CTYPE_DOUBLE = 0x07; +static int CTYPE_BINARY = 0x08; +static int CTYPE_LIST = 0x09; +static int CTYPE_SET = 0x0A; +static int CTYPE_MAP = 0x0B; +static int CTYPE_STRUCT = 0x0C; + +VALUE rb_thrift_compact_proto_write_i16(VALUE self, VALUE i16); + +// TODO: implement this +static int get_compact_type(VALUE type_value) { + int type = FIX2INT(type_value); + if (type == TTYPE_BOOL) { + return CTYPE_BOOLEAN_TRUE; + } else if (type == TTYPE_BYTE) { + return CTYPE_BYTE; + } else if (type == TTYPE_I16) { + return CTYPE_I16; + } else if (type == TTYPE_I32) { + return CTYPE_I32; + } else if (type == TTYPE_I64) { + return CTYPE_I64; + } else if (type == TTYPE_DOUBLE) { + return CTYPE_DOUBLE; + } else if (type == TTYPE_STRING) { + return CTYPE_BINARY; + } else if (type == TTYPE_LIST) { + return CTYPE_LIST; + } else if (type == TTYPE_SET) { + return CTYPE_SET; + } else if (type == TTYPE_MAP) { + return CTYPE_MAP; + } else if (type == TTYPE_STRUCT) { + return CTYPE_STRUCT; + } else { + char str[50]; + sprintf(str, "don't know what type: %d", type); + rb_raise(rb_eStandardError, str); + return 0; + } +} + +static void write_byte_direct(VALUE transport, int8_t b) { + WRITE(transport, (char*)&b, 1); +} + +static void write_field_begin_internal(VALUE self, VALUE type, VALUE id_value, VALUE type_override) { + int id = FIX2INT(id_value); + int last_id = LAST_ID(self); + VALUE transport = GET_TRANSPORT(self); + + // if there's a type override, use that. + int8_t type_to_write = RTEST(type_override) ? FIX2INT(type_override) : get_compact_type(type); + // check if we can use delta encoding for the field id + int diff = id - last_id; + if (diff > 0 && diff <= 15) { + // write them together + write_byte_direct(transport, diff << 4 | (type_to_write & 0x0f)); + } else { + // write them separate + write_byte_direct(transport, type_to_write & 0x0f); + rb_thrift_compact_proto_write_i16(self, id_value); + } + + SET_LAST_ID(self, id_value); +} + +static int32_t int_to_zig_zag(int32_t n) { + return (n << 1) ^ (n >> 31); +} + +static uint64_t ll_to_zig_zag(int64_t n) { + return (n << 1) ^ (n >> 63); +} + +static void write_varint32(VALUE transport, uint32_t n) { + while (true) { + if ((n & ~0x7F) == 0) { + write_byte_direct(transport, n & 0x7f); + break; + } else { + write_byte_direct(transport, (n & 0x7F) | 0x80); + n = n >> 7; + } + } +} + +static void write_varint64(VALUE transport, uint64_t n) { + while (true) { + if ((n & ~0x7F) == 0) { + write_byte_direct(transport, n & 0x7f); + break; + } else { + write_byte_direct(transport, (n & 0x7F) | 0x80); + n = n >> 7; + } + } +} + +static void write_collection_begin(VALUE transport, VALUE elem_type, VALUE size_value) { + int size = FIX2INT(size_value); + if (size <= 14) { + write_byte_direct(transport, size << 4 | get_compact_type(elem_type)); + } else { + write_byte_direct(transport, 0xf0 | get_compact_type(elem_type)); + write_varint32(transport, size); + } +} + + +//-------------------------------- +// interface writing methods +//-------------------------------- + +VALUE rb_thrift_compact_proto_write_i32(VALUE self, VALUE i32); +VALUE rb_thrift_compact_proto_write_string(VALUE self, VALUE str); + +VALUE rb_thrift_compact_proto_write_message_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_struct_begin(VALUE self, VALUE name) { + rb_ary_push(rb_ivar_get(self, last_field_id), INT2FIX(0)); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_struct_end(VALUE self) { + rb_ary_pop(rb_ivar_get(self, last_field_id)); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_field_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_map_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_list_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_set_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_message_begin(VALUE self, VALUE name, VALUE type, VALUE seqid) { + VALUE transport = GET_TRANSPORT(self); + write_byte_direct(transport, PROTOCOL_ID); + write_byte_direct(transport, (VERSION & VERSION_MASK) | ((FIX2INT(type) << TYPE_SHIFT_AMOUNT) & TYPE_MASK)); + write_varint32(transport, FIX2INT(seqid)); + rb_thrift_compact_proto_write_string(self, name); + + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_field_begin(VALUE self, VALUE name, VALUE type, VALUE id) { + if (FIX2INT(type) == TTYPE_BOOL) { + // we want to possibly include the value, so we'll wait. + rb_ivar_set(self, boolean_field_id, rb_ary_new3(2, type, id)); + } else { + write_field_begin_internal(self, type, id, Qnil); + } + + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_field_stop(VALUE self) { + write_byte_direct(GET_TRANSPORT(self), TTYPE_STOP); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_map_begin(VALUE self, VALUE ktype, VALUE vtype, VALUE size_value) { + int size = FIX2INT(size_value); + VALUE transport = GET_TRANSPORT(self); + if (size == 0) { + write_byte_direct(transport, 0); + } else { + write_varint32(transport, size); + write_byte_direct(transport, get_compact_type(ktype) << 4 | get_compact_type(vtype)); + } + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_list_begin(VALUE self, VALUE etype, VALUE size) { + write_collection_begin(GET_TRANSPORT(self), etype, size); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_set_begin(VALUE self, VALUE etype, VALUE size) { + write_collection_begin(GET_TRANSPORT(self), etype, size); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_bool(VALUE self, VALUE b) { + int8_t type = b == Qtrue ? CTYPE_BOOLEAN_TRUE : CTYPE_BOOLEAN_FALSE; + VALUE boolean_field = rb_ivar_get(self, boolean_field_id); + if (NIL_P(boolean_field)) { + // we're not part of a field, so just write the value. + write_byte_direct(GET_TRANSPORT(self), type); + } else { + // we haven't written the field header yet + write_field_begin_internal(self, rb_ary_entry(boolean_field, 0), rb_ary_entry(boolean_field, 1), INT2FIX(type)); + rb_ivar_set(self, boolean_field_id, Qnil); + } + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_byte(VALUE self, VALUE byte) { + CHECK_NIL(byte); + write_byte_direct(GET_TRANSPORT(self), FIX2INT(byte)); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_i16(VALUE self, VALUE i16) { + rb_thrift_compact_proto_write_i32(self, i16); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_i32(VALUE self, VALUE i32) { + CHECK_NIL(i32); + write_varint32(GET_TRANSPORT(self), int_to_zig_zag(NUM2INT(i32))); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_i64(VALUE self, VALUE i64) { + CHECK_NIL(i64); + write_varint64(GET_TRANSPORT(self), ll_to_zig_zag(NUM2LL(i64))); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_double(VALUE self, VALUE dub) { + CHECK_NIL(dub); + // Unfortunately, bitwise_cast doesn't work in C. Bad C! + union { + double f; + int64_t l; + } transfer; + transfer.f = RFLOAT(rb_Float(dub))->value; + char buf[8]; + buf[0] = transfer.l & 0xff; + buf[1] = (transfer.l >> 8) & 0xff; + buf[2] = (transfer.l >> 16) & 0xff; + buf[3] = (transfer.l >> 24) & 0xff; + buf[4] = (transfer.l >> 32) & 0xff; + buf[5] = (transfer.l >> 40) & 0xff; + buf[6] = (transfer.l >> 48) & 0xff; + buf[7] = (transfer.l >> 56) & 0xff; + WRITE(GET_TRANSPORT(self), buf, 8); + return Qnil; +} + +VALUE rb_thrift_compact_proto_write_string(VALUE self, VALUE str) { + VALUE transport = GET_TRANSPORT(self); + write_varint32(transport, RSTRING(str)->len); + WRITE(transport, RSTRING(str)->ptr, RSTRING(str)->len); + return Qnil; +} + +//--------------------------------------- +// interface reading methods +//--------------------------------------- + +#define is_bool_type(ctype) (((ctype) & 0x0F) == CTYPE_BOOLEAN_TRUE || ((ctype) & 0x0F) == CTYPE_BOOLEAN_FALSE) + +VALUE rb_thrift_compact_proto_read_string(VALUE self); +VALUE rb_thrift_compact_proto_read_byte(VALUE self); +VALUE rb_thrift_compact_proto_read_i32(VALUE self); +VALUE rb_thrift_compact_proto_read_i16(VALUE self); + +static int8_t get_ttype(int8_t ctype) { + if (ctype == TTYPE_STOP) { + return TTYPE_STOP; + } else if (ctype == CTYPE_BOOLEAN_TRUE || ctype == CTYPE_BOOLEAN_FALSE) { + return TTYPE_BOOL; + } else if (ctype == CTYPE_BYTE) { + return TTYPE_BYTE; + } else if (ctype == CTYPE_I16) { + return TTYPE_I16; + } else if (ctype == CTYPE_I32) { + return TTYPE_I32; + } else if (ctype == CTYPE_I64) { + return TTYPE_I64; + } else if (ctype == CTYPE_DOUBLE) { + return TTYPE_DOUBLE; + } else if (ctype == CTYPE_BINARY) { + return TTYPE_STRING; + } else if (ctype == CTYPE_LIST) { + return TTYPE_LIST; + } else if (ctype == CTYPE_SET) { + return TTYPE_SET; + } else if (ctype == CTYPE_MAP) { + return TTYPE_MAP; + } else if (ctype == CTYPE_STRUCT) { + return TTYPE_STRUCT; + } else { + char str[50]; + sprintf(str, "don't know what type: %d", ctype); + rb_raise(rb_eStandardError, str); + return 0; + } +} + +static char read_byte_direct(VALUE self) { + return (RSTRING(READ(self, 1))->ptr)[0]; +} + +static int64_t zig_zag_to_ll(int64_t n) { + return (((uint64_t)n) >> 1) ^ -(n & 1); +} + +static int32_t zig_zag_to_int(int32_t n) { + return (((uint32_t)n) >> 1) ^ -(n & 1); +} + +static int64_t read_varint64(VALUE self) { + int shift = 0; + int64_t result = 0; + while (true) { + int8_t b = read_byte_direct(self); + result = result | ((uint64_t)(b & 0x7f) << shift); + if ((b & 0x80) != 0x80) { + break; + } + shift += 7; + } + return result; +} + +static int16_t read_i16(VALUE self) { + return zig_zag_to_int((int32_t)read_varint64(self)); +} + +static VALUE get_protocol_exception(VALUE code, VALUE message) { + VALUE args[2]; + args[0] = code; + args[1] = message; + return rb_class_new_instance(2, (VALUE*)&args, protocol_exception_class); +} + +VALUE rb_thrift_compact_proto_read_message_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_struct_begin(VALUE self) { + rb_ary_push(rb_ivar_get(self, last_field_id), INT2FIX(0)); + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_struct_end(VALUE self) { + rb_ary_pop(rb_ivar_get(self, last_field_id)); + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_field_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_map_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_list_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_set_end(VALUE self) { + return Qnil; +} + +VALUE rb_thrift_compact_proto_read_message_begin(VALUE self) { + int8_t protocol_id = read_byte_direct(self); + if (protocol_id != PROTOCOL_ID) { + char buf[100]; + int len = sprintf(buf, "Expected protocol id %d but got %d", PROTOCOL_ID, protocol_id); + buf[len] = 0; + rb_exc_raise(get_protocol_exception(INT2FIX(-1), rb_str_new2(buf))); + } + + int8_t version_and_type = read_byte_direct(self); + int8_t version = version_and_type & VERSION_MASK; + if (version != VERSION) { + char buf[100]; + int len = sprintf(buf, "Expected version id %d but got %d", version, VERSION); + buf[len] = 0; + rb_exc_raise(get_protocol_exception(INT2FIX(-1), rb_str_new2(buf))); + } + + int8_t type = (version_and_type >> TYPE_SHIFT_AMOUNT) & 0x03; + int32_t seqid = read_varint64(self); + VALUE messageName = rb_thrift_compact_proto_read_string(self); + return rb_ary_new3(3, messageName, INT2FIX(type), INT2NUM(seqid)); +} + +VALUE rb_thrift_compact_proto_read_field_begin(VALUE self) { + int8_t type = read_byte_direct(self); + // if it's a stop, then we can return immediately, as the struct is over. + if ((type & 0x0f) == TTYPE_STOP) { + return rb_ary_new3(3, Qnil, INT2FIX(0), INT2FIX(0)); + } else { + int field_id = 0; + + // mask off the 4 MSB of the type header. it could contain a field id delta. + uint8_t modifier = ((type & 0xf0) >> 4); + + if (modifier == 0) { + // not a delta. look ahead for the zigzag varint field id. + field_id = read_i16(self); + } else { + // has a delta. add the delta to the last read field id. + field_id = LAST_ID(self) + modifier; + } + + // if this happens to be a boolean field, the value is encoded in the type + if (is_bool_type(type)) { + // save the boolean value in a special instance variable. + rb_ivar_set(self, bool_value_id, (type & 0x0f) == CTYPE_BOOLEAN_TRUE ? Qtrue : Qfalse); + } + + // push the new field onto the field stack so we can keep the deltas going. + SET_LAST_ID(self, INT2FIX(field_id)); + return rb_ary_new3(3, Qnil, INT2FIX(get_ttype(type & 0x0f)), INT2FIX(field_id)); + } +} + +VALUE rb_thrift_compact_proto_read_map_begin(VALUE self) { + int32_t size = read_varint64(self); + uint8_t key_and_value_type = size == 0 ? 0 : read_byte_direct(self); + return rb_ary_new3(3, INT2FIX(get_ttype(key_and_value_type >> 4)), INT2FIX(get_ttype(key_and_value_type & 0xf)), INT2FIX(size)); +} + +VALUE rb_thrift_compact_proto_read_list_begin(VALUE self) { + uint8_t size_and_type = read_byte_direct(self); + int32_t size = (size_and_type >> 4) & 0x0f; + if (size == 15) { + size = read_varint64(self); + } + uint8_t type = get_ttype(size_and_type & 0x0f); + return rb_ary_new3(2, INT2FIX(type), INT2FIX(size)); +} + +VALUE rb_thrift_compact_proto_read_set_begin(VALUE self) { + return rb_thrift_compact_proto_read_list_begin(self); +} + +VALUE rb_thrift_compact_proto_read_bool(VALUE self) { + VALUE bool_value = rb_ivar_get(self, bool_value_id); + if (NIL_P(bool_value)) { + return read_byte_direct(self) == CTYPE_BOOLEAN_TRUE ? Qtrue : Qfalse; + } else { + rb_ivar_set(self, bool_value_id, Qnil); + return bool_value; + } +} + +VALUE rb_thrift_compact_proto_read_byte(VALUE self) { + return INT2FIX(read_byte_direct(self)); +} + +VALUE rb_thrift_compact_proto_read_i16(VALUE self) { + return INT2FIX(read_i16(self)); +} + +VALUE rb_thrift_compact_proto_read_i32(VALUE self) { + return INT2NUM(zig_zag_to_int(read_varint64(self))); +} + +VALUE rb_thrift_compact_proto_read_i64(VALUE self) { + return LL2NUM(zig_zag_to_ll(read_varint64(self))); +} + +VALUE rb_thrift_compact_proto_read_double(VALUE self) { + union { + double f; + int64_t l; + } transfer; + VALUE bytes = READ(self, 8); + uint32_t lo = ((uint8_t)(RSTRING(bytes)->ptr[0])) + | (((uint8_t)(RSTRING(bytes)->ptr[1])) << 8) + | (((uint8_t)(RSTRING(bytes)->ptr[2])) << 16) + | (((uint8_t)(RSTRING(bytes)->ptr[3])) << 24); + uint64_t hi = (((uint8_t)(RSTRING(bytes)->ptr[4]))) + | (((uint8_t)(RSTRING(bytes)->ptr[5])) << 8) + | (((uint8_t)(RSTRING(bytes)->ptr[6])) << 16) + | (((uint8_t)(RSTRING(bytes)->ptr[7])) << 24); + transfer.l = (hi << 32) | lo; + + return rb_float_new(transfer.f); +} + +VALUE rb_thrift_compact_proto_read_string(VALUE self) { + int64_t size = read_varint64(self); + return READ(self, size); +} + +static void Init_constants() { + thrift_compact_protocol_class = rb_const_get(thrift_module, rb_intern("CompactProtocol")); + + VERSION = rb_num2ll(rb_const_get(thrift_compact_protocol_class, rb_intern("VERSION"))); + VERSION_MASK = rb_num2ll(rb_const_get(thrift_compact_protocol_class, rb_intern("VERSION_MASK"))); + TYPE_MASK = rb_num2ll(rb_const_get(thrift_compact_protocol_class, rb_intern("TYPE_MASK"))); + TYPE_SHIFT_AMOUNT = FIX2INT(rb_const_get(thrift_compact_protocol_class, rb_intern("TYPE_SHIFT_AMOUNT"))); + PROTOCOL_ID = FIX2INT(rb_const_get(thrift_compact_protocol_class, rb_intern("PROTOCOL_ID"))); + + last_field_id = rb_intern("@last_field"); + boolean_field_id = rb_intern("@boolean_field"); + bool_value_id = rb_intern("@bool_value"); +} + +static void Init_rb_methods() { + rb_define_method(thrift_compact_protocol_class, "native?", rb_thrift_compact_proto_native_qmark, 0); + + rb_define_method(thrift_compact_protocol_class, "write_message_begin", rb_thrift_compact_proto_write_message_begin, 3); + rb_define_method(thrift_compact_protocol_class, "write_field_begin", rb_thrift_compact_proto_write_field_begin, 3); + rb_define_method(thrift_compact_protocol_class, "write_field_stop", rb_thrift_compact_proto_write_field_stop, 0); + rb_define_method(thrift_compact_protocol_class, "write_map_begin", rb_thrift_compact_proto_write_map_begin, 3); + rb_define_method(thrift_compact_protocol_class, "write_list_begin", rb_thrift_compact_proto_write_list_begin, 2); + rb_define_method(thrift_compact_protocol_class, "write_set_begin", rb_thrift_compact_proto_write_set_begin, 2); + rb_define_method(thrift_compact_protocol_class, "write_byte", rb_thrift_compact_proto_write_byte, 1); + rb_define_method(thrift_compact_protocol_class, "write_bool", rb_thrift_compact_proto_write_bool, 1); + rb_define_method(thrift_compact_protocol_class, "write_i16", rb_thrift_compact_proto_write_i16, 1); + rb_define_method(thrift_compact_protocol_class, "write_i32", rb_thrift_compact_proto_write_i32, 1); + rb_define_method(thrift_compact_protocol_class, "write_i64", rb_thrift_compact_proto_write_i64, 1); + rb_define_method(thrift_compact_protocol_class, "write_double", rb_thrift_compact_proto_write_double, 1); + rb_define_method(thrift_compact_protocol_class, "write_string", rb_thrift_compact_proto_write_string, 1); + + rb_define_method(thrift_compact_protocol_class, "write_message_end", rb_thrift_compact_proto_write_message_end, 0); + rb_define_method(thrift_compact_protocol_class, "write_struct_begin", rb_thrift_compact_proto_write_struct_begin, 1); + rb_define_method(thrift_compact_protocol_class, "write_struct_end", rb_thrift_compact_proto_write_struct_end, 0); + rb_define_method(thrift_compact_protocol_class, "write_field_end", rb_thrift_compact_proto_write_field_end, 0); + rb_define_method(thrift_compact_protocol_class, "write_map_end", rb_thrift_compact_proto_write_map_end, 0); + rb_define_method(thrift_compact_protocol_class, "write_list_end", rb_thrift_compact_proto_write_list_end, 0); + rb_define_method(thrift_compact_protocol_class, "write_set_end", rb_thrift_compact_proto_write_set_end, 0); + + + rb_define_method(thrift_compact_protocol_class, "read_message_begin", rb_thrift_compact_proto_read_message_begin, 0); + rb_define_method(thrift_compact_protocol_class, "read_field_begin", rb_thrift_compact_proto_read_field_begin, 0); + rb_define_method(thrift_compact_protocol_class, "read_map_begin", rb_thrift_compact_proto_read_map_begin, 0); + rb_define_method(thrift_compact_protocol_class, "read_list_begin", rb_thrift_compact_proto_read_list_begin, 0); + rb_define_method(thrift_compact_protocol_class, "read_set_begin", rb_thrift_compact_proto_read_set_begin, 0); + rb_define_method(thrift_compact_protocol_class, "read_byte", rb_thrift_compact_proto_read_byte, 0); + rb_define_method(thrift_compact_protocol_class, "read_bool", rb_thrift_compact_proto_read_bool, 0); + rb_define_method(thrift_compact_protocol_class, "read_i16", rb_thrift_compact_proto_read_i16, 0); + rb_define_method(thrift_compact_protocol_class, "read_i32", rb_thrift_compact_proto_read_i32, 0); + rb_define_method(thrift_compact_protocol_class, "read_i64", rb_thrift_compact_proto_read_i64, 0); + rb_define_method(thrift_compact_protocol_class, "read_double", rb_thrift_compact_proto_read_double, 0); + rb_define_method(thrift_compact_protocol_class, "read_string", rb_thrift_compact_proto_read_string, 0); + + rb_define_method(thrift_compact_protocol_class, "read_message_end", rb_thrift_compact_proto_read_message_end, 0); + rb_define_method(thrift_compact_protocol_class, "read_struct_begin", rb_thrift_compact_proto_read_struct_begin, 0); + rb_define_method(thrift_compact_protocol_class, "read_struct_end", rb_thrift_compact_proto_read_struct_end, 0); + rb_define_method(thrift_compact_protocol_class, "read_field_end", rb_thrift_compact_proto_read_field_end, 0); + rb_define_method(thrift_compact_protocol_class, "read_map_end", rb_thrift_compact_proto_read_map_end, 0); + rb_define_method(thrift_compact_protocol_class, "read_list_end", rb_thrift_compact_proto_read_list_end, 0); + rb_define_method(thrift_compact_protocol_class, "read_set_end", rb_thrift_compact_proto_read_set_end, 0); +} + +static void Init_npmt() { + native_proto_method_table *npmt; + npmt = ALLOC(native_proto_method_table); + + npmt->write_field_begin = rb_thrift_compact_proto_write_field_begin; + npmt->write_field_stop = rb_thrift_compact_proto_write_field_stop; + npmt->write_map_begin = rb_thrift_compact_proto_write_map_begin; + npmt->write_list_begin = rb_thrift_compact_proto_write_list_begin; + npmt->write_set_begin = rb_thrift_compact_proto_write_set_begin; + npmt->write_byte = rb_thrift_compact_proto_write_byte; + npmt->write_bool = rb_thrift_compact_proto_write_bool; + npmt->write_i16 = rb_thrift_compact_proto_write_i16; + npmt->write_i32 = rb_thrift_compact_proto_write_i32; + npmt->write_i64 = rb_thrift_compact_proto_write_i64; + npmt->write_double = rb_thrift_compact_proto_write_double; + npmt->write_string = rb_thrift_compact_proto_write_string; + npmt->write_message_end = rb_thrift_compact_proto_write_message_end; + npmt->write_struct_begin = rb_thrift_compact_proto_write_struct_begin; + npmt->write_struct_end = rb_thrift_compact_proto_write_struct_end; + npmt->write_field_end = rb_thrift_compact_proto_write_field_end; + npmt->write_map_end = rb_thrift_compact_proto_write_map_end; + npmt->write_list_end = rb_thrift_compact_proto_write_list_end; + npmt->write_set_end = rb_thrift_compact_proto_write_set_end; + + npmt->read_message_begin = rb_thrift_compact_proto_read_message_begin; + npmt->read_field_begin = rb_thrift_compact_proto_read_field_begin; + npmt->read_map_begin = rb_thrift_compact_proto_read_map_begin; + npmt->read_list_begin = rb_thrift_compact_proto_read_list_begin; + npmt->read_set_begin = rb_thrift_compact_proto_read_set_begin; + npmt->read_byte = rb_thrift_compact_proto_read_byte; + npmt->read_bool = rb_thrift_compact_proto_read_bool; + npmt->read_i16 = rb_thrift_compact_proto_read_i16; + npmt->read_i32 = rb_thrift_compact_proto_read_i32; + npmt->read_i64 = rb_thrift_compact_proto_read_i64; + npmt->read_double = rb_thrift_compact_proto_read_double; + npmt->read_string = rb_thrift_compact_proto_read_string; + npmt->read_message_end = rb_thrift_compact_proto_read_message_end; + npmt->read_struct_begin = rb_thrift_compact_proto_read_struct_begin; + npmt->read_struct_end = rb_thrift_compact_proto_read_struct_end; + npmt->read_field_end = rb_thrift_compact_proto_read_field_end; + npmt->read_map_end = rb_thrift_compact_proto_read_map_end; + npmt->read_list_end = rb_thrift_compact_proto_read_list_end; + npmt->read_set_end = rb_thrift_compact_proto_read_set_end; + + VALUE method_table_object = Data_Wrap_Struct(rb_cObject, 0, free, npmt); + rb_const_set(thrift_compact_protocol_class, rb_intern("@native_method_table"), method_table_object); +} + + + +void Init_compact_protocol() { + Init_constants(); + Init_rb_methods(); + Init_npmt(); +} diff --git a/lib/rb/ext/compact_protocol.h b/lib/rb/ext/compact_protocol.h new file mode 100644 index 000000000..461952856 --- /dev/null +++ b/lib/rb/ext/compact_protocol.h @@ -0,0 +1 @@ +void Init_compact_protocol(); \ No newline at end of file diff --git a/lib/rb/ext/macros.h b/lib/rb/ext/macros.h new file mode 100644 index 000000000..2072cb5d6 --- /dev/null +++ b/lib/rb/ext/macros.h @@ -0,0 +1,7 @@ + +#define GET_TRANSPORT(obj) rb_ivar_get(obj, transport_ivar_id) +#define GET_STRICT_READ(obj) rb_ivar_get(obj, strict_read_ivar_id) +#define GET_STRICT_WRITE(obj) rb_ivar_get(obj, strict_write_ivar_id) +#define WRITE(obj, data, length) rb_funcall(obj, write_method_id, 1, rb_str_new(data, length)) +#define CHECK_NIL(obj) if (NIL_P(obj)) { rb_raise(rb_eStandardError, "nil argument not allowed!");} +#define READ(obj, length) rb_funcall(GET_TRANSPORT(obj), read_method_id, 1, INT2FIX(length)) \ No newline at end of file diff --git a/lib/rb/ext/struct.c b/lib/rb/ext/struct.c index b882344ba..20b48b931 100644 --- a/lib/rb/ext/struct.c +++ b/lib/rb/ext/struct.c @@ -24,10 +24,30 @@ strlcpy (char *dst, const char *src, size_t dst_sz) #endif static native_proto_method_table *mt; +static native_proto_method_table *default_mt; +static VALUE last_proto_class = Qnil; #define IS_CONTAINER(ttype) ((ttype) == TTYPE_MAP || (ttype) == TTYPE_LIST || (ttype) == TTYPE_SET) #define STRUCT_FIELDS(obj) rb_const_get(CLASS_OF(obj), fields_const_id) +static void set_native_proto_function_pointers(VALUE protocol) { + VALUE method_table_object = rb_const_get(CLASS_OF(protocol), rb_intern("@native_method_table")); + // TODO: check nil? + Data_Get_Struct(method_table_object, native_proto_method_table, mt); +} + +static void check_native_proto_method_table(VALUE protocol) { + VALUE protoclass = CLASS_OF(protocol); + if (protoclass != last_proto_class) { + last_proto_class = protoclass; + if (rb_funcall(protocol, native_qmark_method_id, 0) == Qtrue) { + set_native_proto_function_pointers(protocol); + } else { + mt = default_mt; + } + } +} + //------------------------------------------- // Writing section //------------------------------------------- @@ -193,51 +213,44 @@ VALUE default_read_struct_end(VALUE protocol) { } static void set_default_proto_function_pointers() { - mt = ALLOC(native_proto_method_table); - - mt->write_field_begin = default_write_field_begin; - mt->write_field_stop = default_write_field_stop; - mt->write_map_begin = default_write_map_begin; - mt->write_map_end = default_write_map_end; - mt->write_list_begin = default_write_list_begin; - mt->write_list_end = default_write_list_end; - mt->write_set_begin = default_write_set_begin; - mt->write_set_end = default_write_set_end; - mt->write_byte = default_write_byte; - mt->write_bool = default_write_bool; - mt->write_i16 = default_write_i16; - mt->write_i32 = default_write_i32; - mt->write_i64 = default_write_i64; - mt->write_double = default_write_double; - mt->write_string = default_write_string; - mt->write_struct_begin = default_write_struct_begin; - mt->write_struct_end = default_write_struct_end; - mt->write_field_end = default_write_field_end; + default_mt = ALLOC(native_proto_method_table); - mt->read_struct_begin = default_read_struct_begin; - mt->read_struct_end = default_read_struct_end; - mt->read_field_begin = default_read_field_begin; - mt->read_field_end = default_read_field_end; - mt->read_map_begin = default_read_map_begin; - mt->read_map_end = default_read_map_end; - mt->read_list_begin = default_read_list_begin; - mt->read_list_end = default_read_list_end; - mt->read_set_begin = default_read_set_begin; - mt->read_set_end = default_read_set_end; - mt->read_byte = default_read_byte; - mt->read_bool = default_read_bool; - mt->read_i16 = default_read_i16; - mt->read_i32 = default_read_i32; - mt->read_i64 = default_read_i64; - mt->read_double = default_read_double; - mt->read_string = default_read_string; - -} + default_mt->write_field_begin = default_write_field_begin; + default_mt->write_field_stop = default_write_field_stop; + default_mt->write_map_begin = default_write_map_begin; + default_mt->write_map_end = default_write_map_end; + default_mt->write_list_begin = default_write_list_begin; + default_mt->write_list_end = default_write_list_end; + default_mt->write_set_begin = default_write_set_begin; + default_mt->write_set_end = default_write_set_end; + default_mt->write_byte = default_write_byte; + default_mt->write_bool = default_write_bool; + default_mt->write_i16 = default_write_i16; + default_mt->write_i32 = default_write_i32; + default_mt->write_i64 = default_write_i64; + default_mt->write_double = default_write_double; + default_mt->write_string = default_write_string; + default_mt->write_struct_begin = default_write_struct_begin; + default_mt->write_struct_end = default_write_struct_end; + default_mt->write_field_end = default_write_field_end; -static void set_native_proto_function_pointers(VALUE protocol) { - VALUE method_table_object = rb_const_get(CLASS_OF(protocol), rb_intern("@native_method_table")); - // TODO: check nil? - Data_Get_Struct(method_table_object, native_proto_method_table, mt); + default_mt->read_struct_begin = default_read_struct_begin; + default_mt->read_struct_end = default_read_struct_end; + default_mt->read_field_begin = default_read_field_begin; + default_mt->read_field_end = default_read_field_end; + default_mt->read_map_begin = default_read_map_begin; + default_mt->read_map_end = default_read_map_end; + default_mt->read_list_begin = default_read_list_begin; + default_mt->read_list_end = default_read_list_end; + default_mt->read_set_begin = default_read_set_begin; + default_mt->read_set_end = default_read_set_end; + default_mt->read_byte = default_read_byte; + default_mt->read_bool = default_read_bool; + default_mt->read_i16 = default_read_i16; + default_mt->read_i32 = default_read_i32; + default_mt->read_i64 = default_read_i64; + default_mt->read_double = default_read_double; + default_mt->read_string = default_read_string; } // end default protocol methods @@ -383,11 +396,12 @@ static VALUE rb_thrift_struct_write(VALUE self, VALUE protocol) { // call validate rb_funcall(self, validate_method_id, 0); - if (RTEST(rb_funcall(protocol, native_qmark_method_id, 0))) { - set_native_proto_function_pointers(protocol); - } else { - set_default_proto_function_pointers(); - } + // if (rb_funcall(protocol, native_qmark_method_id, 0) == Qtrue) { + // set_native_proto_function_pointers(protocol); + // } else { + // set_default_proto_function_pointers(); + // } + check_native_proto_method_table(protocol); // write struct begin mt->write_struct_begin(protocol, rb_class_name(CLASS_OF(self))); @@ -522,6 +536,8 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { } static VALUE rb_thrift_struct_read(VALUE self, VALUE protocol) { + check_native_proto_method_table(protocol); + // read struct begin mt->read_struct_begin(protocol); diff --git a/lib/rb/ext/thrift_native.c b/lib/rb/ext/thrift_native.c index 4d5623d42..b9afdc315 100644 --- a/lib/rb/ext/thrift_native.c +++ b/lib/rb/ext/thrift_native.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -169,5 +170,6 @@ void Init_thrift_native() { Init_protocol(); Init_struct(); Init_binary_protocol_accelerated(); + Init_compact_protocol(); Init_memory_buffer(); } diff --git a/lib/rb/lib/thrift/protocol/compact_protocol.rb b/lib/rb/lib/thrift/protocol/compact_protocol.rb new file mode 100644 index 000000000..40f0ab3a8 --- /dev/null +++ b/lib/rb/lib/thrift/protocol/compact_protocol.rb @@ -0,0 +1,405 @@ +require 'thrift/protocol' + +module Thrift + class CompactProtocol < Protocol + + PROTOCOL_ID = [0x82].pack('c').unpack('c').first + VERSION = 1 + VERSION_MASK = 0x1f + TYPE_MASK = 0xE0 + TYPE_SHIFT_AMOUNT = 5 + + TSTOP = ["", Types::STOP, 0] + + # + # All of the on-wire type codes. + # + class CompactTypes + BOOLEAN_TRUE = 0x01 + BOOLEAN_FALSE = 0x02 + BYTE = 0x03 + I16 = 0x04 + I32 = 0x05 + I64 = 0x06 + DOUBLE = 0x07 + BINARY = 0x08 + LIST = 0x09 + SET = 0x0A + MAP = 0x0B + STRUCT = 0x0C + + def self.is_bool_type?(b) + (b & 0x0f) == BOOLEAN_TRUE || (b & 0x0f) == BOOLEAN_FALSE + end + + COMPACT_TO_TTYPE = { + Types::STOP => Types::STOP, + BOOLEAN_FALSE => Types::BOOL, + BOOLEAN_TRUE => Types::BOOL, + BYTE => Types::BYTE, + I16 => Types::I16, + I32 => Types::I32, + I64 => Types::I64, + DOUBLE => Types::DOUBLE, + BINARY => Types::STRING, + LIST => Types::LIST, + SET => Types::SET, + MAP => Types::MAP, + STRUCT => Types::STRUCT + } + + TTYPE_TO_COMPACT = { + Types::STOP => Types::STOP, + Types::BOOL => BOOLEAN_TRUE, + Types::BYTE => BYTE, + Types::I16 => I16, + Types::I32 => I32, + Types::I64 => I64, + Types::DOUBLE => DOUBLE, + Types::STRING => BINARY, + Types::LIST => LIST, + Types::SET => SET, + Types::MAP => MAP, + Types::STRUCT => STRUCT + } + + def self.get_ttype(compact_type) + val = COMPACT_TO_TTYPE[compact_type & 0x0f] + raise "don't know what type: #{compact_type & 0x0f}" unless val + val + end + + def self.get_compact_type(ttype) + val = TTYPE_TO_COMPACT[ttype] + raise "don't know what type: #{ttype & 0x0f}" unless val + val + end + end + + def initialize(transport) + super(transport) + + @last_field = [0] + @boolean_value = nil + end + + def write_message_begin(name, type, seqid) + write_byte(PROTOCOL_ID) + write_byte((VERSION & VERSION_MASK) | ((type << TYPE_SHIFT_AMOUNT) & TYPE_MASK)) + write_varint32(seqid) + write_string(name) + nil + end + + def write_struct_begin(name) + @last_field.push(0) + nil + end + + def write_struct_end + @last_field.pop + nil + end + + def write_field_begin(name, type, id) + if type == Types::BOOL + # we want to possibly include the value, so we'll wait. + @boolean_field = [type, id] + else + write_field_begin_internal(type, id) + end + nil + end + + # + # The workhorse of writeFieldBegin. It has the option of doing a + # 'type override' of the type header. This is used specifically in the + # boolean field case. + # + def write_field_begin_internal(type, id, type_override=nil) + last_id = @last_field.pop + + # if there's a type override, use that. + typeToWrite = type_override || CompactTypes.get_compact_type(type) + + # check if we can use delta encoding for the field id + if id > last_id && id - last_id <= 15 + # write them together + write_byte((id - last_id) << 4 | typeToWrite) + else + # write them separate + write_byte(typeToWrite) + write_i16(id) + end + + @last_field.push(id) + nil + end + + def write_field_stop + write_byte(Types::STOP) + end + + def write_map_begin(ktype, vtype, size) + if (size == 0) + write_byte(0) + else + write_varint32(size) + write_byte(CompactTypes.get_compact_type(ktype) << 4 | CompactTypes.get_compact_type(vtype)) + end + end + + def write_list_begin(etype, size) + write_collection_begin(etype, size) + end + + def write_set_begin(etype, size) + write_collection_begin(etype, size); + end + + def write_bool(bool) + type = bool ? CompactTypes::BOOLEAN_TRUE : CompactTypes::BOOLEAN_FALSE + unless @boolean_field.nil? + # we haven't written the field header yet + write_field_begin_internal(@boolean_field.first, @boolean_field.last, type) + @boolean_field = nil + else + # we're not part of a field, so just write the value. + write_byte(type) + end + end + + def write_byte(byte) + @trans.write([byte].pack('c')) + end + + def write_i16(i16) + write_varint32(int_to_zig_zag(i16)) + end + + def write_i32(i32) + write_varint32(int_to_zig_zag(i32)) + end + + def write_i64(i64) + write_varint64(long_to_zig_zag(i64)) + end + + def write_double(dub) + @trans.write([dub].pack("G").reverse) + end + + def write_string(str) + write_varint32(str.length) + @trans.write(str) + end + + def read_message_begin + protocol_id = read_byte() + if protocol_id != PROTOCOL_ID + raise ProtocolException.new("Expected protocol id #{PROTOCOL_ID} but got #{protocol_id}") + end + + version_and_type = read_byte() + version = version_and_type & VERSION_MASK + if (version != VERSION) + raise ProtocolException.new("Expected version #{VERSION} but got #{version}"); + end + + type = (version_and_type >> TYPE_SHIFT_AMOUNT) & 0x03 + seqid = read_varint32() + messageName = read_string() + [messageName, type, seqid] + end + + def read_struct_begin + @last_field.push(0) + "" + end + + def read_struct_end + @last_field.pop() + nil + end + + def read_field_begin + type = read_byte() + + # if it's a stop, then we can return immediately, as the struct is over. + if (type & 0x0f) == Types::STOP + TSTOP + else + field_id = nil + + # mask off the 4 MSB of the type header. it could contain a field id delta. + modifier = (type & 0xf0) >> 4 + if modifier == 0 + # not a delta. look ahead for the zigzag varint field id. + field_id = read_i16() + else + # has a delta. add the delta to the last read field id. + field_id = @last_field.pop + modifier + end + + # if this happens to be a boolean field, the value is encoded in the type + if CompactTypes.is_bool_type?(type) + # save the boolean value in a special instance variable. + @bool_value = (type & 0x0f) == CompactTypes::BOOLEAN_TRUE + end + + # push the new field onto the field stack so we can keep the deltas going. + @last_field.push(field_id) + ["", CompactTypes.get_ttype(type & 0x0f), field_id] + end + end + + def read_map_begin + size = read_varint32() + key_and_value_type = size == 0 ? 0 : read_byte() + [CompactTypes.get_ttype(key_and_value_type >> 4), CompactTypes.get_ttype(key_and_value_type & 0xf), size] + end + + def read_list_begin + size_and_type = read_byte() + size = (size_and_type >> 4) & 0x0f + if size == 15 + size = read_varint32() + end + type = CompactTypes.get_ttype(size_and_type) + [type, size] + end + + def read_set_begin + read_list_begin + end + + def read_bool + unless @bool_value.nil? + bv = @bool_value + @bool_value = nil + bv + else + read_byte() == CompactTypes::BOOLEAN_TRUE + end + end + + def read_byte + dat = trans.read_all(1) + val = dat[0] + if (val > 0x7f) + val = 0 - ((val - 1) ^ 0xff) + end + val + end + + def read_i16 + zig_zag_to_int(read_varint32()) + end + + def read_i32 + zig_zag_to_int(read_varint32()) + end + + def read_i64 + zig_zag_to_long(read_varint64()) + end + + def read_double + dat = trans.read_all(8) + val = dat.reverse.unpack('G').first + val + end + + def read_string + size = read_varint32() + trans.read_all(size) + end + + + private + + # + # Abstract method for writing the start of lists and sets. List and sets on + # the wire differ only by the type indicator. + # + def write_collection_begin(elem_type, size) + if size <= 14 + write_byte(size << 4 | CompactTypes.get_compact_type(elem_type)) + else + write_byte(0xf0 | CompactTypes.get_compact_type(elem_type)) + write_varint32(size) + end + end + + def write_varint32(n) + # int idx = 0; + while true + if (n & ~0x7F) == 0 + # i32buf[idx++] = (byte)n; + write_byte(n) + break + # return; + else + # i32buf[idx++] = (byte)((n & 0x7F) | 0x80); + write_byte((n & 0x7F) | 0x80) + n = n >> 7 + end + end + # trans_.write(i32buf, 0, idx); + end + + SEVEN_BIT_MASK = 0x7F + EVERYTHING_ELSE_MASK = ~SEVEN_BIT_MASK + + def write_varint64(n) + while true + if (n & EVERYTHING_ELSE_MASK) == 0 #TODO need to find a way to make this into a long... + write_byte(n) + break + else + write_byte((n & SEVEN_BIT_MASK) | 0x80) + n >>= 7 + end + end + end + + def read_varint32() + read_varint64() + end + + def read_varint64() + shift = 0 + result = 0 + while true + b = read_byte() + result |= (b & 0x7f) << shift + break if (b & 0x80) != 0x80 + shift += 7 + end + result + end + + def int_to_zig_zag(n) + (n << 1) ^ (n >> 31) + end + + def long_to_zig_zag(l) + # puts "zz encoded #{l} to #{(l << 1) ^ (l >> 63)}" + (l << 1) ^ (l >> 63) + end + + def zig_zag_to_int(n) + (n >> 1) ^ -(n & 1) + end + + def zig_zag_to_long(n) + (n >> 1) ^ -(n & 1) + end + end + + class CompactProtocolFactory < ProtocolFactory + def get_protocol(trans) + CompactProtocol.new(trans) + end + end +end diff --git a/lib/rb/lib/thrift/struct.rb b/lib/rb/lib/thrift/struct.rb index abed860e5..57c8805d8 100644 --- a/lib/rb/lib/thrift/struct.rb +++ b/lib/rb/lib/thrift/struct.rb @@ -39,7 +39,7 @@ module Thrift unless fields_with_default_values fields_with_default_values = {} struct_fields.each do |fid, field_def| - if field_def[:default] + unless field_def[:default].nil? fields_with_default_values[field_def[:name]] = field_def[:default] end end @@ -60,23 +60,19 @@ module Thrift names_to_ids[name] end - # Obsoleted by THRIFT-246, which generates this method inline - # TODO: Should be removed at some point. -- Kevin Clark - def struct_fields - self.class.const_get(:FIELDS) - end - def each_field - struct_fields.each do |fid, data| - yield fid, data[:type], data[:name], data[:default], data[:optional] + struct_fields.keys.sort.each do |fid| + data = struct_fields[fid] + yield fid, data end end def inspect(skip_optional_nulls = true) fields = [] - each_field do |fid, type, name, default, optional| + each_field do |fid, field_info| + name = field_info[:name] value = instance_variable_get("@#{name}") - unless skip_optional_nulls && optional && value.nil? + unless skip_optional_nulls && field_info[:optional] && value.nil? fields << "#{name}:#{value.inspect}" end end @@ -84,48 +80,40 @@ module Thrift end def read(iprot) - # TODO(kevinclark): Make sure transport is C readable - if iprot.respond_to?(:decode_binary) - iprot.decode_binary(self, iprot.trans) - else - iprot.read_struct_begin - loop do - fname, ftype, fid = iprot.read_field_begin - break if (ftype == Types::STOP) - handle_message(iprot, fid, ftype) - iprot.read_field_end - end - iprot.read_struct_end + iprot.read_struct_begin + loop do + fname, ftype, fid = iprot.read_field_begin + break if (ftype == Types::STOP) + handle_message(iprot, fid, ftype) + iprot.read_field_end end + iprot.read_struct_end validate end def write(oprot) validate - # if oprot.respond_to?(:encode_binary) - # # TODO(kevinclark): Clean this so I don't have to access the transport. - # oprot.trans.write oprot.encode_binary(self) - # else - oprot.write_struct_begin(self.class.name) - each_field do |fid, type, name| - unless (value = instance_variable_get("@#{name}")).nil? - if is_container? type - oprot.write_field_begin(name, type, fid) - write_container(oprot, value, struct_fields[fid]) - oprot.write_field_end - else - oprot.write_field(name, type, fid, value) - end + oprot.write_struct_begin(self.class.name) + each_field do |fid, field_info| + name = field_info[:name] + type = field_info[:type] + if (value = instance_variable_get("@#{name}")) + if is_container? type + oprot.write_field_begin(name, type, fid) + write_container(oprot, value, field_info) + oprot.write_field_end + else + oprot.write_field(name, type, fid, value) end end - oprot.write_field_stop - oprot.write_struct_end - # end + end + oprot.write_field_stop + oprot.write_struct_end end def ==(other) - return false unless other.is_a?(self.class) - each_field do |fid, type, name, default| + each_field do |fid, field_info| + name = field_info[:name] return false unless self.instance_variable_get("@#{name}") == other.instance_variable_get("@#{name}") end true @@ -140,6 +128,19 @@ module Thrift 0 end + def differences(other) + diffs = [] + unless other.is_a?(self.class) + diffs << "Different class!" + else + each_field do |fid, field_info| + name = field_info[:name] + diffs << "#{name} differs!" unless self.instance_variable_get("@#{name}") == other.instance_variable_get("@#{name}") + end + end + diffs + end + def self.field_accessor(klass, *fields) fields.each do |field| klass.send :attr_reader, field @@ -256,8 +257,12 @@ module Thrift end end + CONTAINER_TYPES = [] + CONTAINER_TYPES[Types::LIST] = true + CONTAINER_TYPES[Types::MAP] = true + CONTAINER_TYPES[Types::SET] = true def is_container?(type) - [Types::LIST, Types::MAP, Types::SET].include? type + CONTAINER_TYPES[type] end def field_info(field) diff --git a/lib/rb/lib/thrift/transport.rb b/lib/rb/lib/thrift/transport.rb index e3a6511e2..5780e5bcc 100644 --- a/lib/rb/lib/thrift/transport.rb +++ b/lib/rb/lib/thrift/transport.rb @@ -297,6 +297,22 @@ module Thrift end end + def inspect_buffer + out = [] + for idx in 0...(@buf.size) + # if idx != 0 + # out << " " + # end + + if idx == @index + out << ">" + end + + out << @buf[idx].to_s(16) + end + out.join(" ") + end + alias_method :consume!, :read end deprecate_class! :TMemoryBuffer => MemoryBuffer diff --git a/lib/rb/script/proto_benchmark.rb b/lib/rb/script/proto_benchmark.rb new file mode 100644 index 000000000..1e1f08f01 --- /dev/null +++ b/lib/rb/script/proto_benchmark.rb @@ -0,0 +1,104 @@ +require File.dirname(__FILE__) + "/../spec/spec_helper.rb" +require "lib/thrift/serializer" +require "lib/thrift/protocol/binaryprotocolaccelerated" + +require "benchmark" +# require "ruby-prof" + +obj = Fixtures::COMPACT_PROTOCOL_TEST_STRUCT + +HOW_MANY = 1_000 + +binser = Thrift::Serializer.new +bin_data = binser.serialize(obj) +bindeser = Thrift::Deserializer.new +accel_bin_ser = Thrift::Serializer.new(Thrift::BinaryProtocolAcceleratedFactory.new) +accel_bin_deser = Thrift::Deserializer.new(Thrift::BinaryProtocolAcceleratedFactory.new) + +compact_ser = Thrift::Serializer.new(Thrift::CompactProtocolFactory.new) +compact_data = compact_ser.serialize(obj) +compact_deser = Thrift::Deserializer.new(Thrift::CompactProtocolFactory.new) + +Benchmark.bm(60) do |reporter| + reporter.report("binary protocol, write") do + HOW_MANY.times do + binser.serialize(obj) + end + end + + reporter.report("accelerated binary protocol, write") do + HOW_MANY.times do + accel_bin_ser.serialize(obj) + end + end + + reporter.report("compact protocol, write") do + # RubyProf.start + HOW_MANY.times do + compact_ser.serialize(obj) + end + # result = RubyProf.stop + # printer = RubyProf::GraphHtmlPrinter.new(result) + # file = File.open("profile.html", "w+") + # printer.print(file, 0) + # file.close + end + + reporter.report("binary protocol, read") do + HOW_MANY.times do + bindeser.deserialize(obj, bin_data) + end + end + + reporter.report("accelerated binary protocol, read") do + HOW_MANY.times do + accel_bin_deser.deserialize(obj, bin_data) + end + end + + reporter.report("compact protocol, read") do + HOW_MANY.times do + compact_deser.deserialize(obj, compact_data) + end + end + + + # f = File.new("/tmp/testfile", "w") + # proto = Thrift::BinaryProtocolAccelerated.new(Thrift::IOStreamTransport.new(Thrift::MemoryBuffer.new, f)) + # reporter.report("accelerated binary protocol, write (to disk)") do + # HOW_MANY.times do + # obj.write(proto) + # end + # f.flush + # end + # f.close + # + # f = File.new("/tmp/testfile", "r") + # proto = Thrift::BinaryProtocolAccelerated.new(Thrift::IOStreamTransport.new(f, Thrift::MemoryBuffer.new)) + # reporter.report("accelerated binary protocol, read (from disk)") do + # HOW_MANY.times do + # obj.read(proto) + # end + # end + # f.close + # + # f = File.new("/tmp/testfile", "w") + # reporter.report("compact protocol, write (to disk)") do + # proto = Thrift::CompactProtocol.new(Thrift::IOStreamTransport.new(Thrift::MemoryBuffer.new, f)) + # HOW_MANY.times do + # obj.write(proto) + # end + # f.flush + # end + # f.close + # + # f = File.new("/tmp/testfile", "r") + # reporter.report("compact protocol, read (from disk)") do + # proto = Thrift::CompactProtocol.new(Thrift::IOStreamTransport.new(f, Thrift::MemoryBuffer.new)) + # HOW_MANY.times do + # obj.read(proto) + # end + # end + # f.close + +end \ No newline at end of file diff --git a/lib/rb/script/read_struct.rb b/lib/rb/script/read_struct.rb new file mode 100644 index 000000000..c43ba58c1 --- /dev/null +++ b/lib/rb/script/read_struct.rb @@ -0,0 +1,25 @@ +require "spec/spec_helper" +require "lib/thrift/serializer" + +path, factory_class = ARGV + +factory = eval(factory_class).new + +deser = Thrift::Deserializer.new(factory) + +cpts = CompactProtoTestStruct.new +CompactProtoTestStruct.constants.each do |const| + cpts.instance_variable_set("@#{const}", nil) +end + +data = File.read(path) + +deser.deserialize(cpts, data) + +if cpts == Fixtures::COMPACT_PROTOCOL_TEST_STRUCT + puts "Object verified successfully!" +else + puts "Object failed verification! Expected #{Fixtures::COMPACT_PROTOCOL_TEST_STRUCT.inspect} but got #{cpts.inspect}" + + puts cpts.differences(Fixtures::COMPACT_PROTOCOL_TEST_STRUCT) +end diff --git a/lib/rb/script/write_struct.rb b/lib/rb/script/write_struct.rb new file mode 100644 index 000000000..28010ac22 --- /dev/null +++ b/lib/rb/script/write_struct.rb @@ -0,0 +1,12 @@ +require "spec/spec_helper" +require "lib/thrift/serializer" + +path, factory_class = ARGV + +factory = eval(factory_class).new + +ser = Thrift::Serializer.new(factory) + +File.open(path, "w") do |file| + file.write(ser.serialize(Fixtures::COMPACT_PROTOCOL_TEST_STRUCT)) +end \ No newline at end of file diff --git a/lib/rb/spec/compact_protocol_spec.rb b/lib/rb/spec/compact_protocol_spec.rb new file mode 100644 index 000000000..516f19eb0 --- /dev/null +++ b/lib/rb/spec/compact_protocol_spec.rb @@ -0,0 +1,110 @@ +require File.dirname(__FILE__) + '/spec_helper' +require "thrift/protocol/compact_protocol" + +describe Thrift::CompactProtocol do + TESTS = { + :byte => (-127..127).to_a, + :i16 => (0..14).map {|shift| [1 << shift, -(1 << shift)]}.flatten.sort, + :i32 => (0..30).map {|shift| [1 << shift, -(1 << shift)]}.flatten.sort, + :i64 => (0..62).map {|shift| [1 << shift, -(1 << shift)]}.flatten.sort, + :string => ["", "1", "short", "fourteen123456", "fifteen12345678", "1" * 127, "1" * 3000], + :binary => ["", "\001", "\001" * 5, "\001" * 14, "\001" * 15, "\001" * 127, "\001" * 3000], + :double => [0.0, 1.0, -1.0, 1.1, -1.1, 10000000.1, 1.0/0.0, -1.0/0.0], + :bool => [true, false] + } + + it "should encode and decode naked primitives correctly" do + TESTS.each_pair do |primitive_type, test_values| + test_values.each do |value| + # puts "testing #{value}" if primitive_type == :i64 + trans = Thrift::MemoryBuffer.new + proto = Thrift::CompactProtocol.new(trans) + + proto.send(writer(primitive_type), value) + # puts "buf: #{trans.inspect_buffer}" if primitive_type == :i64 + read_back = proto.send(reader(primitive_type)) + read_back.should == value + end + end + end + + it "should encode and decode primitives in fields correctly" do + TESTS.each_pair do |primitive_type, test_values| + final_primitive_type = primitive_type == :binary ? :string : primitive_type + thrift_type = Thrift::Types.const_get(final_primitive_type.to_s.upcase) + # puts primitive_type + test_values.each do |value| + trans = Thrift::MemoryBuffer.new + proto = Thrift::CompactProtocol.new(trans) + + proto.write_field_begin(nil, thrift_type, 15) + proto.send(writer(primitive_type), value) + proto.write_field_end + + proto = Thrift::CompactProtocol.new(trans) + name, type, id = proto.read_field_begin + type.should == thrift_type + id.should == 15 + read_back = proto.send(reader(primitive_type)) + read_back.should == value + proto.read_field_end + end + end + end + + it "should encode and decode a monster struct correctly" do + trans = Thrift::MemoryBuffer.new + proto = Thrift::CompactProtocol.new(trans) + + struct = CompactProtoTestStruct.new + # sets and maps don't hash well... not sure what to do here. + struct.set_byte_map = nil + struct.map_byte_map = nil + struct.write(proto) + + # puts trans.inspect + + struct2 = CompactProtoTestStruct.new + struct2.instance_variables.each do |ivar| + struct2.instance_variable_set(ivar, nil) + end + + struct2.should_not == struct + + struct2.read(proto) + + struct2.should == struct + end + + it "should make method calls correctly" do + client_out_trans = Thrift::MemoryBuffer.new + client_out_proto = Thrift::CompactProtocol.new(client_out_trans) + + client_in_trans = Thrift::MemoryBuffer.new + client_in_proto = Thrift::CompactProtocol.new(client_in_trans) + + processor = Srv::Processor.new(JankyHandler.new) + + client = Srv::Client.new(client_in_proto, client_out_proto) + client.send_Janky(1) + # puts client_out_trans.inspect_buffer + processor.process(client_out_proto, client_in_proto) + client.recv_Janky.should == 2 + end + + class JankyHandler + def Janky(i32arg) + i32arg * 2 + end + end + + def writer(sym) + sym = sym == :binary ? :string : sym + "write_#{sym.to_s}" + end + + def reader(sym) + sym = sym == :binary ? :string : sym + "read_#{sym.to_s}" + end +end \ No newline at end of file diff --git a/lib/rb/spec/deprecation_spec.rb b/lib/rb/spec/deprecation_spec.rb index 77786d813..007452823 100644 --- a/lib/rb/spec/deprecation_spec.rb +++ b/lib/rb/spec/deprecation_spec.rb @@ -436,6 +436,9 @@ describe "deprecate_module!" do FIELDS = { 1 => {:name => "foo", :type => Thrift::Types::STRING} } + def struct_fields + FIELDS + end end stub_stderr('ThriftStruct') klass.new(:foo => "foo") diff --git a/lib/rb/spec/spec_helper.rb b/lib/rb/spec/spec_helper.rb index a9e13744c..30b9da701 100644 --- a/lib/rb/spec/spec_helper.rb +++ b/lib/rb/spec/spec_helper.rb @@ -32,4 +32,11 @@ Spec::Runner.configure do |configuration| end end -require "thrift_native" \ No newline at end of file +require "thrift/protocol/compact_protocol" +require "thrift_native" + +require File.dirname(__FILE__) + "/../debug_proto_test/gen-rb/Srv" + +module Fixtures + COMPACT_PROTOCOL_TEST_STRUCT = CompactProtoTestStruct.new(:a_binary => [0,1,2,3,4,5,6,7,8].pack('c*')) +end \ No newline at end of file diff --git a/lib/rb/spec/struct_spec.rb b/lib/rb/spec/struct_spec.rb index 1a22f57f7..1eda3c3b4 100644 --- a/lib/rb/spec/struct_spec.rb +++ b/lib/rb/spec/struct_spec.rb @@ -10,16 +10,8 @@ class ThriftStructSpec < Spec::ExampleGroup describe Struct do it "should iterate over all fields properly" do fields = {} - Foo.new.each_field { |fid,type,name,default,optional| fields[fid] = [type,name,default,optional] } - fields.should == { - 1 => [Types::I32, 'simple', 53, nil], - 2 => [Types::STRING, 'words', "words", nil], - 3 => [Types::STRUCT, 'hello', Hello.new(:greeting => 'hello, world!'), nil], - 4 => [Types::LIST, 'ints', [1, 2, 2, 3], nil], - 5 => [Types::MAP, 'complex', nil, nil], - 6 => [Types::SET, 'shorts', Set.new([5, 17, 239]), nil], - 7 => [Types::STRING, 'opt_string', nil, true] - } + Foo.new.each_field { |fid,field_info| fields[fid] = field_info } + fields.should == Foo::FIELDS end it "should initialize all fields to defaults" do