merge string branch and go to new spec, for str type. TODO: ext types and nif types. in favor of #16

This commit is contained in:
UENISHI Kota 2013-09-09 22:03:44 +09:00
commit 17534112a7
14 changed files with 203 additions and 90 deletions

View File

@ -1,4 +1,4 @@
.PHONY: compile xref eunit clean doc check make deps
.PHONY: compile xref eunit clean doc check make deps test
REBAR=./rebar
@ -26,6 +26,9 @@ eunit: xref
test: eunit
eunit: compile
@./rebar skip_deps=true eunit
clean:
@$(REBAR) clean

View File

@ -10,6 +10,15 @@ prequisites for runtime
Erlang runtime system ( http://erlang.org/ ), >= R15B -- otherwise rebar won't work.
Based on [the new msgpack spec 232a0d](https://github.com/msgpack/msgpack/blob/232a0d14c6057000cc4a478f0dfbb5942ac54e9e/spec.md) .
Now this supports string type.
::
1> {ok, "埼玉"} = msgpack:unpack(msgpack:pack("埼玉")).
{ok,[22524,29577]}
rebar.config
------------
@ -38,19 +47,30 @@ Stream deserialization
experimental feature: NIF (de)serializer
----------------------------------------
**Currently NIF is unavailable on both new and old spec.**
since 0.1.1 - only tested in MacOS, Linux
::
src/msgpack.erl:343:<0.131.0>: serialize: 0.405 s
src/msgpack.erl:344:<0.131.0>: deserialize: 0.470 s
src/msgpack.erl:345:<0.131.0>: for 1884 KB test data(msgpack).
src/msgpack.erl:349:<0.131.0>: serialize: 0.019 s
src/msgpack.erl:350:<0.131.0>: deserialize: 0.036 s
src/msgpack.erl:351:<0.131.0>: for 1884 KB test data(msgpack_nif).
src/msgpack.erl:355:<0.131.0>: serialize: 0.043 s
src/msgpack.erl:356:<0.131.0>: deserialize: 0.027 s
src/msgpack.erl:357:<0.131.0>: for 3828 KB test data(t2b/b2t).
test/bench_tests.erl:36:<0.125.0>: serialize: 0.543 s
test/bench_tests.erl:37:<0.125.0>: deserialize: 0.653 s
test/bench_tests.erl:38:<0.125.0>: for 2041 KB test data(jiffy).
test/bench_tests.erl:42:<0.125.0>: serialize: 0.508 s
test/bench_tests.erl:43:<0.125.0>: deserialize: 0.630 s
test/bench_tests.erl:44:<0.125.0>: for 2041 KB test data(jsx).
test/bench_tests.erl:54:<0.125.0>: serialize: 0.063 s
test/bench_tests.erl:55:<0.125.0>: deserialize: 0.053 s
test/bench_tests.erl:56:<0.125.0>: for 3828 KB test data(t2b/b2t).
test/bench_tests.erl:75:<0.125.0>: serialize: 1.332 s
test/bench_tests.erl:87:<0.125.0>: deserialize: 1.601 s
test/bench_tests.erl:88:<0.125.0>: for 2041 KB test data(jiffy x 5).
test/bench_tests.erl:75:<0.125.0>: serialize: 1.243 s
test/bench_tests.erl:87:<0.125.0>: deserialize: 3.233 s
test/bench_tests.erl:88:<0.125.0>: for 2041 KB test data(jsx x 5).
test/bench_tests.erl:75:<0.125.0>: serialize: 0.076 s
test/bench_tests.erl:87:<0.125.0>: deserialize: 0.061 s
test/bench_tests.erl:88:<0.125.0>: for 3828 KB test data(t2b/b2t x 5).
License

View File

@ -28,7 +28,8 @@
interface = jiffy :: jiffy | jsx,
map_unpack_fun = fun msgpack_unpacker:unpack_map_jiffy/4 :: fun(),
impl = erlang :: erlang | nif,
allow_atom = none :: none | pack %% allows atom when packing
allow_atom = none :: none | pack, %% allows atom when packing
enable_str = true :: boolean() %% false for old spec
}).
-define(OPTION, #options_v2).
-type msgpack_option() :: #options_v2{}.

View File

@ -11,15 +11,15 @@
]
}.
{port_sources, ["c_src/*.c"]}.
{port_env, [
%% Make sure to set -fPIC when compiling leveldb
{"CFLAGS", "$CFLAGS -Wall -O3 -fPIC"},
{"CXXFLAGS", "$CXXFLAGS -Wall -O3 -fPIC"},
{"DRV_CFLAGS", "$DRV_CFLAGS -O3 -Wall -I c_src/msgpack-0.5.7/src"},
{"DRV_LDFLAGS", "$DRV_LDFLAGS c_src/msgpack-0.5.7/src/.libs/libmsgpack.a"}
]}.
%% {port_sources, ["c_src/*.c"]}.
%% {port_env, [
%% %% Make sure to set -fPIC when compiling leveldb
%% {"CFLAGS", "$CFLAGS -Wall -O3 -fPIC"},
%% {"CXXFLAGS", "$CXXFLAGS -Wall -O3 -fPIC"},
%% {"DRV_CFLAGS", "$DRV_CFLAGS -O3 -Wall -I c_src/msgpack-0.5.7/src"},
%% {"DRV_LDFLAGS", "$DRV_LDFLAGS c_src/msgpack-0.5.7/src/.libs/libmsgpack.a"}
%% ]}.
{pre_hooks, [{compile, "sh c_src/build.sh"}]}.
%% {pre_hooks, [{compile, "sh c_src/build.sh"}]}.
{post_hooks, [{clean, "rm -rf c_src/msgpack-0.5.7"}]}.
%% {post_hooks, [{clean, "rm -rf c_src/msgpack-0.5.7"}]}.

View File

@ -35,7 +35,8 @@
-module(msgpack).
-export([pack/1, unpack/1, unpack_stream/1, pack/2, unpack/2, unpack_stream/2]).
-export([pack/1, unpack/1, unpack_stream/1,
pack/2, unpack/2, unpack_stream/2]).
-type msgpack_map_jsx() :: [{msgpack_term(), msgpack_term()}] | [{}].
@ -125,6 +126,7 @@ unpack_stream(Bin, [Interface]) ->
throw:Exception -> {error, Exception}
end.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% unit tests
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@ -51,7 +51,8 @@ unpack(Data) when is_binary(Data) ->
unpack(Badarg) ->
{error, {badarg, Badarg}}.
%% NOTE: nif is disabled until new C version is released.
-undef(TEST).
-ifdef(TEST).
mini_test()->

View File

@ -31,12 +31,17 @@ pack(F, _) when is_float(F) ->
pack_double(F);
pack(nil, _) ->
<< 16#C0:8 >>;
pack(true, _) ->
<< 16#C3:8 >>;
pack(false, _) ->
<< 16#C2:8 >>;
pack(Bin, _) when is_binary(Bin) ->
pack_raw(Bin);
pack(true, _) ->
<< 16#C3:8 >>;
pack(Bin, Opt) when is_binary(Bin) ->
case Opt of
#options_v2{enable_str=true} = Opt -> pack_raw2(Bin);
#options_v2{enable_str=false} = Opt -> pack_raw(Bin);
#options_v1{} = Opt -> pack_raw(Bin)
end;
pack(Atom, #options_v2{allow_atom=pack} = Opt) when is_atom(Atom) ->
pack(erlang:atom_to_binary(Atom, unicode), Opt);
@ -51,8 +56,28 @@ pack(Map, Opt = ?OPTION{interface=jsx}) when Map =:= [{}]->
pack([{_,_}|_] = Map, Opt = ?OPTION{interface=jsx}) ->
pack_map(Map, Opt);
pack(List, #options_v2{enable_str=true}=Opt) when is_list(List) ->
try
case lists:all(fun is_integer/1, List) of
true ->
case pack_string(List, Opt) of
%% NOTE: due to erlang string format, msgpack can't
%% tell the differenec between string and list of
%% integers. Thus users have to take care not to
%% include invalid unicode characters.
%% Here to fallback into list(int()).
{error, _} -> pack_array(List, Opt);
Bin when is_binary(Bin) -> Bin
end;
false ->
pack_array(List, Opt)
end
catch error:badarg -> pack_array(List, Opt)
end;
pack(List, Opt) when is_list(List) ->
pack_array(List, Opt);
pack(Other, _) ->
throw({badarg, Other}).
@ -102,7 +127,7 @@ pack_double(F) ->
-spec pack_raw(binary()) -> binary().
%% raw bytes
%% raw bytes in old spec
pack_raw(Bin) ->
case byte_size(Bin) of
Len when Len < 32->
@ -113,6 +138,29 @@ pack_raw(Bin) ->
<< 16#DB:8, Len:32/big-unsigned-integer-unit:1, Bin/binary >>
end.
-spec pack_raw2(binary()) -> binary().
%% raw bytes in new spec
pack_raw2(Bin) ->
case byte_size(Bin) of
Len when Len < 32->
<< 16#C4:8, Len:8/big-unsigned-integer-unit:1, Bin/binary>>;
Len when Len < 16#10000 -> % 65536
<< 16#C5:8, Len:16/big-unsigned-integer-unit:1, Bin/binary >>;
Len ->
<< 16#C6:8, Len:32/big-unsigned-integer-unit:1, Bin/binary >>
end.
%% @doc String MAY be unicode. Or may be EUC-JP, SJIS, UTF-1024 or anything.
%% EVERY implementation must show its binary length just after type indicator
%% to skip the damn string if its unreadable.
-spec pack_string(list(), msgpack_option()) -> binary() | {error, atom()}.
pack_string(String, _Opt) ->
case unicode:characters_to_binary(String) of
{error, _Bin, _} -> {error, broken_unicode};
{incomplete, _Bin, _} -> {error, incomplete_unicode};
Bin -> pack_raw(Bin)
end.
-spec pack_array([msgpack:object()], list()) -> binary() | no_return().
pack_array([], _) ->
<< 2#1001:4, 0:4/integer-unit:1 >>;

View File

@ -34,6 +34,14 @@ unpack_stream(<<16#C2, Rest/binary>>, _) ->
unpack_stream(<<16#C3, Rest/binary>>, _) ->
{true, Rest};
%% Raw bytes
unpack_stream(<<16#C4, L:8/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) ->
{V, Rest};
unpack_stream(<<16#C5, L:16/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) ->
{V, Rest};
unpack_stream(<<16#C6, L:32/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) ->
{V, Rest};
%% Floats
unpack_stream(<<16#CA, V:32/float-unit:1, Rest/binary>>, _) ->
{V, Rest};
@ -60,18 +68,31 @@ unpack_stream(<<16#D2, V:32/big-signed-integer-unit:1, Rest/binary>>, _) ->
unpack_stream(<<16#D3, V:64/big-signed-integer-unit:1, Rest/binary>>, _) ->
{V, Rest};
%% Raw bytes
unpack_stream(<<16#DA, L:16/unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) ->
{V, Rest};
unpack_stream(<<16#DB, L:32/unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) ->
{V, Rest};
%% Strings
unpack_stream(<<2#101:3, L:5, V:L/binary, Rest/binary>>,
?OPTION{enable_str=true} = _Opt) ->
{unpack_string(V), Rest};
unpack_stream(<<16#DA, L:16/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>,
?OPTION{enable_str=true} = _Opt) ->
{unpack_string(V), Rest};
unpack_stream(<<16#DB, L:32/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>,
?OPTION{enable_str=true} = _Opt) ->
{unpack_string(V), Rest};
%% Arrays
unpack_stream(<<2#1001:4, L:4, Rest/binary>>, Opt) ->
unpack_array(Rest, L, [], Opt);
unpack_stream(<<16#DC, L:16/big-unsigned-integer-unit:1, Rest/binary>>, Opt) ->
unpack_array(Rest, L, [], Opt);
unpack_stream(<<16#DD, L:32/big-unsigned-integer-unit:1, Rest/binary>>, Opt) ->
unpack_array(Rest, L, [], Opt);
%% Maps
unpack_stream(<<2#1000:4, L:4, Rest/binary>>, Opt) ->
Unpacker = Opt?OPTION.map_unpack_fun,
Unpacker(Rest, L, [], Opt);
unpack_stream(<<16#DE, L:16/big-unsigned-integer-unit:1, Rest/binary>>, Opt) ->
Unpacker = Opt?OPTION.map_unpack_fun,
Unpacker(Rest, L, [], Opt);
@ -87,26 +108,16 @@ unpack_stream(<<0:1, V:7, Rest/binary>>, _) -> {V, Rest};
%% negative int
unpack_stream(<<2#111:3, V:5, Rest/binary>>, _) -> {V - 2#100000, Rest};
%% raw bytes
unpack_stream(<<2#101:3, L:5, V:L/binary, Rest/binary>>, _) -> {V, Rest};
%% array
unpack_stream(<<2#1001:4, L:4, Rest/binary>>, Opt) ->
unpack_array(Rest, L, [], Opt);
%% map
unpack_stream(<<2#1000:4, L:4, Rest/binary>>, Opt) ->
Unpacker = Opt?OPTION.map_unpack_fun,
Unpacker(Rest, L, [], Opt);
%% Invalid data
unpack_stream(<<F, R/binary>>, _) when F==16#C1;
F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9;
F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9 ->
throw({badarg, <<F, R/binary>>});
%% Incomplete data (we've covered every complete/invalid case; anything left is incomplete)
unpack_stream(_, _) ->
throw(incomplete).
unpack_stream(<<16#C1, _R/binary>>, _) -> throw({badarg, 16#C1});
%% for extention types
unpack_stream(<<F, _/binary>>, _) when
F==16#C7; F==16#C8; F==16#C9;
F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8 -> throw({badarg, F});
unpack_stream(_, _) -> throw(incomplete).
-spec unpack_array(binary(), non_neg_integer(), [msgpack:object()], msgpack_option()) ->
{[msgpack:object()], binary()} | no_return().
@ -144,3 +155,12 @@ unpack_map_jsx(Bin, Len, Acc, Opt) ->
{Key, Rest} = unpack_stream(Bin, Opt),
{Value, Rest2} = unpack_stream(Rest, Opt),
unpack_map_jsx(Rest2, Len-1, [{Key,Value}|Acc], Opt).
%% NOTE: msgpack DOES validate the binary as valid unicode string.
unpack_string(Binary) ->
case unicode:characters_to_list(Binary) of
{error, _S, _Rest} -> throw({error, {invalid_string, Binary}});
{imcomplete, _S, _Rest} -> throw({error, {invalid_string, Binary}});
String -> String
end.

View File

@ -43,11 +43,11 @@ benchmark1_test()->
{ok, Data}=?debugTime("deserialize", msgpack:unpack(S, [jsx])),
?debugFmt("for ~p KB test data(jsx).", [byte_size(S) div 1024]).
benchmark2_test()->
Data=[test_data() || _ <- lists:seq(0, ?CNT)],
S=?debugTime(" serialize", msgpack_nif:pack(Data)),
{ok, Data}=?debugTime("deserialize", msgpack_nif:unpack(S)),
?debugFmt("for ~p KB test data(nif).", [byte_size(S) div 1024]).
%% benchmark2_test()->
%% Data=[test_data() || _ <- lists:seq(0, ?CNT)],
%% S=?debugTime(" serialize", msgpack_nif:pack(Data)),
%% {ok, Data}=?debugTime("deserialize", msgpack_nif:unpack(S)),
%% ?debugFmt("for ~p KB test data(nif).", [byte_size(S) div 1024]).
benchmark3_test()->
Data=[test_data() || _ <- lists:seq(0, ?CNT)],
@ -112,16 +112,16 @@ benchmark_p1_test_() ->
msgpack:unpack(Data, [jsx])
end))}.
benchmark_p2_test_() ->
{timeout, 600,
?_assertEqual(ok,
multirunner("nif",
fun(Data) ->
msgpack_nif:pack(Data)
end,
fun(Data) ->
msgpack_nif:unpack(Data)
end))}.
%% benchmark_p2_test_() ->
%% {timeout, 600,
%% ?_assertEqual(ok,
%% multirunner("nif",
%% fun(Data) ->
%% msgpack_nif:pack(Data)
%% end,
%% fun(Data) ->
%% msgpack_nif:unpack(Data)
%% end))}.
benchmark_p3_test_() ->
{timeout, 600,

View File

@ -1,5 +1,8 @@
-module(msgpack_nif_tests).
-undef(NIF).
-ifdef(NIF).
-import(msgpack_nif, [pack/1, unpack/1]).
-include_lib("eunit/include/eunit.hrl").
@ -96,6 +99,8 @@ binary_test_() ->
end}
].
-endif.
%% long_binary_test_()->
%% [
%% {"long binary",

View File

@ -1,6 +1,6 @@
-module(msgpack_proper).
-export([choose_type_jsx/0, choose_type_jiffy/0]).
-export([choose_type/0, choose_type_jsx/0, choose_type_jiffy/0]).
-export([array16_jsx/0, array32_jsx/0,
map16_jsx/0, map32_jsx/0]).
@ -11,6 +11,9 @@
-include_lib("proper/include/proper.hrl").
%% default behaviour
choose_type() -> choose_type_jiffy().
choose_type_jsx() ->
oneof([positive_fixnum(), negative_fixnum(),
int8(), int16(), int32(), int64(),

View File

@ -1,11 +1,13 @@
-module(msgpack_props).
-include_lib("proper/include/proper.hrl").
-include_lib("eunit/include/eunit.hrl").
-import(msgpack_proper, [choose_type/0]).
-import(msgpack_proper, [choose_type_jsx/0,
choose_type_jiffy/0]).
prop_type() ->
numtests(300,
numtests(128,
?FORALL(Term, choose_type_jsx(),
begin
Binary = msgpack:pack(Term, [jsx]),
@ -22,19 +24,15 @@ prop_type() ->
choose_reserved() ->
oneof([<<16#C1>>,
<<16#C4>>,
<<16#C5>>,
<<16#C6>>,
<<16#C7>>,
<<16#C8>>,
<<16#C9>>,
<<16#D4>>,
<<16#D5>>,
<<16#D6>>,
<<16#D7>>,
<<16#D8>>,
<<16#D9>>]).
oneof([16#C1,
16#C7,
16#C8,
16#C9,
16#D4,
16#D5,
16#D6,
16#D7,
16#D8]).
prop_reserved() ->
numtests(300,

View File

@ -105,12 +105,12 @@ issue_jsx_5_test() ->
}
],
Encoded = msgpack:pack(Term, [jsx]),
Bin0 = <<130,164,116,121,112,101,167,119,111,114,107,101,
114,115,164,100,97,116,97,145,130,168,119,111,114,
107,101,114,105,100,165,115,116,100,46,49,165,115,108,111,116,115,144>>,
Bin0 = <<130,196,4,116,121,112,101,196,7,119,111,114,107,101,114,115,
196,4,100,97,116,97,145,130,196,8,119,111,114,107,101,114,105,100,
196,5,115,116,100,46,49,196,5,115,108,111,116,115,160>>,
?assertEqual(Bin0, Encoded),
{ok, Decoded} = msgpack:unpack(Encoded, [jsx]),
{ok, Decoded} = msgpack:unpack(Bin0, [jsx]),
?assertEqual(Term, Decoded).
@ -124,9 +124,20 @@ issue_jiffy_5_test() ->
}
]},
Encoded = msgpack:pack(Term, [jiffy]),
Bin0 = <<130,164,116,121,112,101,167,119,111,114,107,101,
114,115,164,100,97,116,97,145,130,168,119,111,114,
107,101,114,105,100,165,115,116,100,46,49,165,115,108,111,116,115,144>>,
Bin0 = <<130,196,4,116,121,112,101,196,7,119,111,114,107,101,114,115,
196,4,100,97,116,97,145,130,196,8,119,111,114,107,101,114,105,100,
196,5,115,116,100,46,49,196,5,115,108,111,116,115,160>>,
?assertEqual(Bin0, Encoded),
{ok, Decoded} = msgpack:unpack(Encoded, [jiffy]),
{ok, Decoded} = msgpack:unpack(Bin0, [jiffy]),
?assertEqual(Term, Decoded).
string_test() ->
{ok, CWD} = file:get_cwd(),
Path = CWD ++ "/../test/utf8.txt",
{ok, UnicodeBin} = file:read_file(Path),
String = unicode:characters_to_list(UnicodeBin),
MsgpackStringBin = msgpack:pack(String),
{ok, String} = msgpack:unpack(MsgpackStringBin).

1
test/utf8.txt Normal file
View File

@ -0,0 +1 @@
東京埼玉