diff --git a/Makefile b/Makefile index a590cd7..53d2376 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: compile xref eunit clean doc check make deps +.PHONY: compile xref eunit clean doc check make deps test REBAR=./rebar @@ -26,6 +26,9 @@ eunit: xref test: eunit +eunit: compile + @./rebar skip_deps=true eunit + clean: @$(REBAR) clean diff --git a/README.rst b/README.rst index 2c2b81c..6569ef7 100644 --- a/README.rst +++ b/README.rst @@ -10,6 +10,15 @@ prequisites for runtime Erlang runtime system ( http://erlang.org/ ), >= R15B -- otherwise rebar won't work. Based on [the new msgpack spec 232a0d](https://github.com/msgpack/msgpack/blob/232a0d14c6057000cc4a478f0dfbb5942ac54e9e/spec.md) . +Now this supports string type. + +:: + + 1> {ok, "埼玉"} = msgpack:unpack(msgpack:pack("埼玉")). + {ok,[22524,29577]} + + + rebar.config ------------ @@ -38,19 +47,30 @@ Stream deserialization experimental feature: NIF (de)serializer ---------------------------------------- +**Currently NIF is unavailable on both new and old spec.** + since 0.1.1 - only tested in MacOS, Linux :: - src/msgpack.erl:343:<0.131.0>: serialize: 0.405 s - src/msgpack.erl:344:<0.131.0>: deserialize: 0.470 s - src/msgpack.erl:345:<0.131.0>: for 1884 KB test data(msgpack). - src/msgpack.erl:349:<0.131.0>: serialize: 0.019 s - src/msgpack.erl:350:<0.131.0>: deserialize: 0.036 s - src/msgpack.erl:351:<0.131.0>: for 1884 KB test data(msgpack_nif). - src/msgpack.erl:355:<0.131.0>: serialize: 0.043 s - src/msgpack.erl:356:<0.131.0>: deserialize: 0.027 s - src/msgpack.erl:357:<0.131.0>: for 3828 KB test data(t2b/b2t). + test/bench_tests.erl:36:<0.125.0>: serialize: 0.543 s + test/bench_tests.erl:37:<0.125.0>: deserialize: 0.653 s + test/bench_tests.erl:38:<0.125.0>: for 2041 KB test data(jiffy). + test/bench_tests.erl:42:<0.125.0>: serialize: 0.508 s + test/bench_tests.erl:43:<0.125.0>: deserialize: 0.630 s + test/bench_tests.erl:44:<0.125.0>: for 2041 KB test data(jsx). + test/bench_tests.erl:54:<0.125.0>: serialize: 0.063 s + test/bench_tests.erl:55:<0.125.0>: deserialize: 0.053 s + test/bench_tests.erl:56:<0.125.0>: for 3828 KB test data(t2b/b2t). + test/bench_tests.erl:75:<0.125.0>: serialize: 1.332 s + test/bench_tests.erl:87:<0.125.0>: deserialize: 1.601 s + test/bench_tests.erl:88:<0.125.0>: for 2041 KB test data(jiffy x 5). + test/bench_tests.erl:75:<0.125.0>: serialize: 1.243 s + test/bench_tests.erl:87:<0.125.0>: deserialize: 3.233 s + test/bench_tests.erl:88:<0.125.0>: for 2041 KB test data(jsx x 5). + test/bench_tests.erl:75:<0.125.0>: serialize: 0.076 s + test/bench_tests.erl:87:<0.125.0>: deserialize: 0.061 s + test/bench_tests.erl:88:<0.125.0>: for 3828 KB test data(t2b/b2t x 5). License diff --git a/include/msgpack.hrl b/include/msgpack.hrl index d71b182..c1a3913 100644 --- a/include/msgpack.hrl +++ b/include/msgpack.hrl @@ -28,7 +28,8 @@ interface = jiffy :: jiffy | jsx, map_unpack_fun = fun msgpack_unpacker:unpack_map_jiffy/4 :: fun(), impl = erlang :: erlang | nif, - allow_atom = none :: none | pack %% allows atom when packing + allow_atom = none :: none | pack, %% allows atom when packing + enable_str = true :: boolean() %% false for old spec }). -define(OPTION, #options_v2). -type msgpack_option() :: #options_v2{}. diff --git a/rebar.config b/rebar.config index d93dc23..606abf0 100644 --- a/rebar.config +++ b/rebar.config @@ -11,15 +11,15 @@ ] }. -{port_sources, ["c_src/*.c"]}. -{port_env, [ - %% Make sure to set -fPIC when compiling leveldb - {"CFLAGS", "$CFLAGS -Wall -O3 -fPIC"}, - {"CXXFLAGS", "$CXXFLAGS -Wall -O3 -fPIC"}, - {"DRV_CFLAGS", "$DRV_CFLAGS -O3 -Wall -I c_src/msgpack-0.5.7/src"}, - {"DRV_LDFLAGS", "$DRV_LDFLAGS c_src/msgpack-0.5.7/src/.libs/libmsgpack.a"} - ]}. +%% {port_sources, ["c_src/*.c"]}. +%% {port_env, [ +%% %% Make sure to set -fPIC when compiling leveldb +%% {"CFLAGS", "$CFLAGS -Wall -O3 -fPIC"}, +%% {"CXXFLAGS", "$CXXFLAGS -Wall -O3 -fPIC"}, +%% {"DRV_CFLAGS", "$DRV_CFLAGS -O3 -Wall -I c_src/msgpack-0.5.7/src"}, +%% {"DRV_LDFLAGS", "$DRV_LDFLAGS c_src/msgpack-0.5.7/src/.libs/libmsgpack.a"} +%% ]}. -{pre_hooks, [{compile, "sh c_src/build.sh"}]}. +%% {pre_hooks, [{compile, "sh c_src/build.sh"}]}. -{post_hooks, [{clean, "rm -rf c_src/msgpack-0.5.7"}]}. +%% {post_hooks, [{clean, "rm -rf c_src/msgpack-0.5.7"}]}. diff --git a/src/msgpack.erl b/src/msgpack.erl index f7c7297..0937693 100644 --- a/src/msgpack.erl +++ b/src/msgpack.erl @@ -35,7 +35,8 @@ -module(msgpack). --export([pack/1, unpack/1, unpack_stream/1, pack/2, unpack/2, unpack_stream/2]). +-export([pack/1, unpack/1, unpack_stream/1, + pack/2, unpack/2, unpack_stream/2]). -type msgpack_map_jsx() :: [{msgpack_term(), msgpack_term()}] | [{}]. @@ -125,6 +126,7 @@ unpack_stream(Bin, [Interface]) -> throw:Exception -> {error, Exception} end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% unit tests %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/src/msgpack_nif.erl b/src/msgpack_nif.erl index db7a320..93c2563 100644 --- a/src/msgpack_nif.erl +++ b/src/msgpack_nif.erl @@ -51,7 +51,8 @@ unpack(Data) when is_binary(Data) -> unpack(Badarg) -> {error, {badarg, Badarg}}. - +%% NOTE: nif is disabled until new C version is released. +-undef(TEST). -ifdef(TEST). mini_test()-> diff --git a/src/msgpack_packer.erl b/src/msgpack_packer.erl index 24452d6..11f4458 100644 --- a/src/msgpack_packer.erl +++ b/src/msgpack_packer.erl @@ -31,12 +31,17 @@ pack(F, _) when is_float(F) -> pack_double(F); pack(nil, _) -> << 16#C0:8 >>; -pack(true, _) -> - << 16#C3:8 >>; pack(false, _) -> << 16#C2:8 >>; -pack(Bin, _) when is_binary(Bin) -> - pack_raw(Bin); +pack(true, _) -> + << 16#C3:8 >>; + +pack(Bin, Opt) when is_binary(Bin) -> + case Opt of + #options_v2{enable_str=true} = Opt -> pack_raw2(Bin); + #options_v2{enable_str=false} = Opt -> pack_raw(Bin); + #options_v1{} = Opt -> pack_raw(Bin) + end; pack(Atom, #options_v2{allow_atom=pack} = Opt) when is_atom(Atom) -> pack(erlang:atom_to_binary(Atom, unicode), Opt); @@ -51,8 +56,28 @@ pack(Map, Opt = ?OPTION{interface=jsx}) when Map =:= [{}]-> pack([{_,_}|_] = Map, Opt = ?OPTION{interface=jsx}) -> pack_map(Map, Opt); +pack(List, #options_v2{enable_str=true}=Opt) when is_list(List) -> + try + case lists:all(fun is_integer/1, List) of + true -> + case pack_string(List, Opt) of + %% NOTE: due to erlang string format, msgpack can't + %% tell the differenec between string and list of + %% integers. Thus users have to take care not to + %% include invalid unicode characters. + %% Here to fallback into list(int()). + {error, _} -> pack_array(List, Opt); + Bin when is_binary(Bin) -> Bin + end; + false -> + pack_array(List, Opt) + end + catch error:badarg -> pack_array(List, Opt) + end; + pack(List, Opt) when is_list(List) -> pack_array(List, Opt); + pack(Other, _) -> throw({badarg, Other}). @@ -102,7 +127,7 @@ pack_double(F) -> -spec pack_raw(binary()) -> binary(). -%% raw bytes +%% raw bytes in old spec pack_raw(Bin) -> case byte_size(Bin) of Len when Len < 32-> @@ -113,6 +138,29 @@ pack_raw(Bin) -> << 16#DB:8, Len:32/big-unsigned-integer-unit:1, Bin/binary >> end. +-spec pack_raw2(binary()) -> binary(). +%% raw bytes in new spec +pack_raw2(Bin) -> + case byte_size(Bin) of + Len when Len < 32-> + << 16#C4:8, Len:8/big-unsigned-integer-unit:1, Bin/binary>>; + Len when Len < 16#10000 -> % 65536 + << 16#C5:8, Len:16/big-unsigned-integer-unit:1, Bin/binary >>; + Len -> + << 16#C6:8, Len:32/big-unsigned-integer-unit:1, Bin/binary >> + end. + +%% @doc String MAY be unicode. Or may be EUC-JP, SJIS, UTF-1024 or anything. +%% EVERY implementation must show its binary length just after type indicator +%% to skip the damn string if its unreadable. +-spec pack_string(list(), msgpack_option()) -> binary() | {error, atom()}. +pack_string(String, _Opt) -> + case unicode:characters_to_binary(String) of + {error, _Bin, _} -> {error, broken_unicode}; + {incomplete, _Bin, _} -> {error, incomplete_unicode}; + Bin -> pack_raw(Bin) + end. + -spec pack_array([msgpack:object()], list()) -> binary() | no_return(). pack_array([], _) -> << 2#1001:4, 0:4/integer-unit:1 >>; diff --git a/src/msgpack_unpacker.erl b/src/msgpack_unpacker.erl index 461f685..a7cb01e 100644 --- a/src/msgpack_unpacker.erl +++ b/src/msgpack_unpacker.erl @@ -34,6 +34,14 @@ unpack_stream(<<16#C2, Rest/binary>>, _) -> unpack_stream(<<16#C3, Rest/binary>>, _) -> {true, Rest}; +%% Raw bytes +unpack_stream(<<16#C4, L:8/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) -> + {V, Rest}; +unpack_stream(<<16#C5, L:16/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) -> + {V, Rest}; +unpack_stream(<<16#C6, L:32/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) -> + {V, Rest}; + %% Floats unpack_stream(<<16#CA, V:32/float-unit:1, Rest/binary>>, _) -> {V, Rest}; @@ -60,18 +68,31 @@ unpack_stream(<<16#D2, V:32/big-signed-integer-unit:1, Rest/binary>>, _) -> unpack_stream(<<16#D3, V:64/big-signed-integer-unit:1, Rest/binary>>, _) -> {V, Rest}; -%% Raw bytes -unpack_stream(<<16#DA, L:16/unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) -> - {V, Rest}; -unpack_stream(<<16#DB, L:32/unsigned-integer-unit:1, V:L/binary, Rest/binary>>, _) -> - {V, Rest}; +%% Strings +unpack_stream(<<2#101:3, L:5, V:L/binary, Rest/binary>>, + ?OPTION{enable_str=true} = _Opt) -> + {unpack_string(V), Rest}; + +unpack_stream(<<16#DA, L:16/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, + ?OPTION{enable_str=true} = _Opt) -> + {unpack_string(V), Rest}; + +unpack_stream(<<16#DB, L:32/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>>, + ?OPTION{enable_str=true} = _Opt) -> + {unpack_string(V), Rest}; %% Arrays +unpack_stream(<<2#1001:4, L:4, Rest/binary>>, Opt) -> + unpack_array(Rest, L, [], Opt); unpack_stream(<<16#DC, L:16/big-unsigned-integer-unit:1, Rest/binary>>, Opt) -> unpack_array(Rest, L, [], Opt); unpack_stream(<<16#DD, L:32/big-unsigned-integer-unit:1, Rest/binary>>, Opt) -> unpack_array(Rest, L, [], Opt); + %% Maps +unpack_stream(<<2#1000:4, L:4, Rest/binary>>, Opt) -> + Unpacker = Opt?OPTION.map_unpack_fun, + Unpacker(Rest, L, [], Opt); unpack_stream(<<16#DE, L:16/big-unsigned-integer-unit:1, Rest/binary>>, Opt) -> Unpacker = Opt?OPTION.map_unpack_fun, Unpacker(Rest, L, [], Opt); @@ -87,26 +108,16 @@ unpack_stream(<<0:1, V:7, Rest/binary>>, _) -> {V, Rest}; %% negative int unpack_stream(<<2#111:3, V:5, Rest/binary>>, _) -> {V - 2#100000, Rest}; -%% raw bytes -unpack_stream(<<2#101:3, L:5, V:L/binary, Rest/binary>>, _) -> {V, Rest}; - -%% array -unpack_stream(<<2#1001:4, L:4, Rest/binary>>, Opt) -> - unpack_array(Rest, L, [], Opt); - -%% map -unpack_stream(<<2#1000:4, L:4, Rest/binary>>, Opt) -> - Unpacker = Opt?OPTION.map_unpack_fun, - Unpacker(Rest, L, [], Opt); %% Invalid data -unpack_stream(<>, _) when F==16#C1; - F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9; - F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9 -> - throw({badarg, <>}); -%% Incomplete data (we've covered every complete/invalid case; anything left is incomplete) -unpack_stream(_, _) -> - throw(incomplete). +unpack_stream(<<16#C1, _R/binary>>, _) -> throw({badarg, 16#C1}); + +%% for extention types +unpack_stream(<>, _) when + F==16#C7; F==16#C8; F==16#C9; + F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8 -> throw({badarg, F}); + +unpack_stream(_, _) -> throw(incomplete). -spec unpack_array(binary(), non_neg_integer(), [msgpack:object()], msgpack_option()) -> {[msgpack:object()], binary()} | no_return(). @@ -144,3 +155,12 @@ unpack_map_jsx(Bin, Len, Acc, Opt) -> {Key, Rest} = unpack_stream(Bin, Opt), {Value, Rest2} = unpack_stream(Rest, Opt), unpack_map_jsx(Rest2, Len-1, [{Key,Value}|Acc], Opt). + + +%% NOTE: msgpack DOES validate the binary as valid unicode string. +unpack_string(Binary) -> + case unicode:characters_to_list(Binary) of + {error, _S, _Rest} -> throw({error, {invalid_string, Binary}}); + {imcomplete, _S, _Rest} -> throw({error, {invalid_string, Binary}}); + String -> String + end. diff --git a/test/bench_tests.erl b/test/bench_tests.erl index c68d45a..3fa98ab 100644 --- a/test/bench_tests.erl +++ b/test/bench_tests.erl @@ -43,11 +43,11 @@ benchmark1_test()-> {ok, Data}=?debugTime("deserialize", msgpack:unpack(S, [jsx])), ?debugFmt("for ~p KB test data(jsx).", [byte_size(S) div 1024]). -benchmark2_test()-> - Data=[test_data() || _ <- lists:seq(0, ?CNT)], - S=?debugTime(" serialize", msgpack_nif:pack(Data)), - {ok, Data}=?debugTime("deserialize", msgpack_nif:unpack(S)), - ?debugFmt("for ~p KB test data(nif).", [byte_size(S) div 1024]). +%% benchmark2_test()-> +%% Data=[test_data() || _ <- lists:seq(0, ?CNT)], +%% S=?debugTime(" serialize", msgpack_nif:pack(Data)), +%% {ok, Data}=?debugTime("deserialize", msgpack_nif:unpack(S)), +%% ?debugFmt("for ~p KB test data(nif).", [byte_size(S) div 1024]). benchmark3_test()-> Data=[test_data() || _ <- lists:seq(0, ?CNT)], @@ -112,16 +112,16 @@ benchmark_p1_test_() -> msgpack:unpack(Data, [jsx]) end))}. -benchmark_p2_test_() -> - {timeout, 600, - ?_assertEqual(ok, - multirunner("nif", - fun(Data) -> - msgpack_nif:pack(Data) - end, - fun(Data) -> - msgpack_nif:unpack(Data) - end))}. +%% benchmark_p2_test_() -> +%% {timeout, 600, +%% ?_assertEqual(ok, +%% multirunner("nif", +%% fun(Data) -> +%% msgpack_nif:pack(Data) +%% end, +%% fun(Data) -> +%% msgpack_nif:unpack(Data) +%% end))}. benchmark_p3_test_() -> {timeout, 600, diff --git a/test/msgpack_nif_tests.erl b/test/msgpack_nif_tests.erl index 03823aa..97cf5b3 100644 --- a/test/msgpack_nif_tests.erl +++ b/test/msgpack_nif_tests.erl @@ -1,5 +1,8 @@ -module(msgpack_nif_tests). +-undef(NIF). +-ifdef(NIF). + -import(msgpack_nif, [pack/1, unpack/1]). -include_lib("eunit/include/eunit.hrl"). @@ -96,6 +99,8 @@ binary_test_() -> end} ]. +-endif. + %% long_binary_test_()-> %% [ %% {"long binary", diff --git a/test/msgpack_proper.erl b/test/msgpack_proper.erl index 9863519..7accf65 100644 --- a/test/msgpack_proper.erl +++ b/test/msgpack_proper.erl @@ -1,6 +1,6 @@ -module(msgpack_proper). --export([choose_type_jsx/0, choose_type_jiffy/0]). +-export([choose_type/0, choose_type_jsx/0, choose_type_jiffy/0]). -export([array16_jsx/0, array32_jsx/0, map16_jsx/0, map32_jsx/0]). @@ -11,6 +11,9 @@ -include_lib("proper/include/proper.hrl"). +%% default behaviour +choose_type() -> choose_type_jiffy(). + choose_type_jsx() -> oneof([positive_fixnum(), negative_fixnum(), int8(), int16(), int32(), int64(), diff --git a/test/msgpack_props.erl b/test/msgpack_props.erl index 9ebae4c..fa8136d 100644 --- a/test/msgpack_props.erl +++ b/test/msgpack_props.erl @@ -1,11 +1,13 @@ -module(msgpack_props). -include_lib("proper/include/proper.hrl"). +-include_lib("eunit/include/eunit.hrl"). --import(msgpack_proper, [choose_type/0]). +-import(msgpack_proper, [choose_type_jsx/0, + choose_type_jiffy/0]). prop_type() -> - numtests(300, + numtests(128, ?FORALL(Term, choose_type_jsx(), begin Binary = msgpack:pack(Term, [jsx]), @@ -22,19 +24,15 @@ prop_type() -> choose_reserved() -> - oneof([<<16#C1>>, - <<16#C4>>, - <<16#C5>>, - <<16#C6>>, - <<16#C7>>, - <<16#C8>>, - <<16#C9>>, - <<16#D4>>, - <<16#D5>>, - <<16#D6>>, - <<16#D7>>, - <<16#D8>>, - <<16#D9>>]). + oneof([16#C1, + 16#C7, + 16#C8, + 16#C9, + 16#D4, + 16#D5, + 16#D6, + 16#D7, + 16#D8]). prop_reserved() -> numtests(300, diff --git a/test/msgpack_test.erl b/test/msgpack_test.erl index d89abd6..e4ebe96 100644 --- a/test/msgpack_test.erl +++ b/test/msgpack_test.erl @@ -105,12 +105,12 @@ issue_jsx_5_test() -> } ], Encoded = msgpack:pack(Term, [jsx]), - Bin0 = <<130,164,116,121,112,101,167,119,111,114,107,101, - 114,115,164,100,97,116,97,145,130,168,119,111,114, - 107,101,114,105,100,165,115,116,100,46,49,165,115,108,111,116,115,144>>, + Bin0 = <<130,196,4,116,121,112,101,196,7,119,111,114,107,101,114,115, + 196,4,100,97,116,97,145,130,196,8,119,111,114,107,101,114,105,100, + 196,5,115,116,100,46,49,196,5,115,108,111,116,115,160>>, ?assertEqual(Bin0, Encoded), - {ok, Decoded} = msgpack:unpack(Encoded, [jsx]), + {ok, Decoded} = msgpack:unpack(Bin0, [jsx]), ?assertEqual(Term, Decoded). @@ -124,9 +124,20 @@ issue_jiffy_5_test() -> } ]}, Encoded = msgpack:pack(Term, [jiffy]), - Bin0 = <<130,164,116,121,112,101,167,119,111,114,107,101, - 114,115,164,100,97,116,97,145,130,168,119,111,114, - 107,101,114,105,100,165,115,116,100,46,49,165,115,108,111,116,115,144>>, + Bin0 = <<130,196,4,116,121,112,101,196,7,119,111,114,107,101,114,115, + 196,4,100,97,116,97,145,130,196,8,119,111,114,107,101,114,105,100, + 196,5,115,116,100,46,49,196,5,115,108,111,116,115,160>>, ?assertEqual(Bin0, Encoded), - {ok, Decoded} = msgpack:unpack(Encoded, [jiffy]), + + {ok, Decoded} = msgpack:unpack(Bin0, [jiffy]), ?assertEqual(Term, Decoded). + + +string_test() -> + {ok, CWD} = file:get_cwd(), + Path = CWD ++ "/../test/utf8.txt", + {ok, UnicodeBin} = file:read_file(Path), + String = unicode:characters_to_list(UnicodeBin), + MsgpackStringBin = msgpack:pack(String), + {ok, String} = msgpack:unpack(MsgpackStringBin). + diff --git a/test/utf8.txt b/test/utf8.txt new file mode 100644 index 0000000..c5a5dd4 --- /dev/null +++ b/test/utf8.txt @@ -0,0 +1 @@ +東京埼玉