mirror of
https://github.com/valitydev/msgpack-erlang.git
synced 2024-11-06 00:35:24 +00:00
experimental string type
This commit is contained in:
parent
7b026c2723
commit
4250db151c
4
Makefile
4
Makefile
@ -1,4 +1,4 @@
|
||||
.PHONY: compile xref eunit clean doc check make deps
|
||||
.PHONY: compile xref eunit clean doc check make deps test
|
||||
|
||||
all: compile
|
||||
|
||||
@ -17,6 +17,8 @@ compile:
|
||||
xref:
|
||||
@./rebar xref
|
||||
|
||||
test: eunit
|
||||
|
||||
eunit: compile
|
||||
@./rebar skip_deps=true eunit
|
||||
|
||||
|
@ -52,6 +52,15 @@ since 0.1.1 - only tested in MacOS, Linux
|
||||
src/msgpack.erl:357:<0.131.0>: for 3828 KB test data(t2b/b2t).
|
||||
|
||||
|
||||
experimental feature: String type
|
||||
---------------------------------
|
||||
|
||||
::
|
||||
|
||||
1> {ok, "埼玉"} = msgpack:unpack(msgpack:pack_string("埼玉")).
|
||||
{ok,[22524,29577]}
|
||||
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
|
114
src/msgpack.erl
114
src/msgpack.erl
@ -35,7 +35,8 @@
|
||||
|
||||
-module(msgpack).
|
||||
|
||||
-export([pack/1, unpack/1, unpack_stream/1]).
|
||||
-export([pack/1, unpack/1, unpack_stream/1,
|
||||
pack_string/1]).
|
||||
|
||||
-type msgpack_map() :: {[{msgpack_term(), msgpack_term()}]}.
|
||||
|
||||
@ -94,6 +95,9 @@ unpack(Badarg) ->
|
||||
{error, {badarg, Badarg}}.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% internal APIs
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
@ -219,6 +223,15 @@ pack_map_([], Acc) -> Acc;
|
||||
pack_map_([{Key,Value}|Tail], Acc) ->
|
||||
pack_map_(Tail, << Acc/binary, (pack_(Key))/binary, (pack_(Value))/binary>>).
|
||||
|
||||
%% @doc String MAY be unicode. Or may be EUC-JP, SJIS, UTF-1024 or anything.
|
||||
%% EVERY implementation must show its binary length just after type indicator
|
||||
%% to skip the string if its unreadable.
|
||||
-spec pack_string(list()) -> binary().
|
||||
pack_string(String) ->
|
||||
Bin = unicode:characters_to_binary(String),
|
||||
N = erlang:byte_size(Bin),
|
||||
<< 16#C1:8, N:32/big-unsigned-integer-unit:1, Bin/binary >>.
|
||||
|
||||
% Users SHOULD NOT send too long list: this uses lists:reverse/1
|
||||
-spec unpack_map_(binary(), non_neg_integer(), [{msgpack_term(), msgpack_term()}]) ->
|
||||
{{[{msgpack_term(), msgpack_term()}]}, binary()} | no_return().
|
||||
@ -232,48 +245,67 @@ unpack_map_(Bin, Len, Acc) ->
|
||||
-spec unpack_(Bin::binary()) -> {msgpack_term(), binary()} | no_return().
|
||||
unpack_(Bin) ->
|
||||
case Bin of
|
||||
% ATOMS
|
||||
<<16#C0, Rest/binary>> -> {nil, Rest};
|
||||
<<16#C2, Rest/binary>> -> {false, Rest};
|
||||
<<16#C3, Rest/binary>> -> {true, Rest};
|
||||
% Floats
|
||||
<<16#CA, V:32/float-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#CB, V:64/float-unit:1, Rest/binary>> -> {V, Rest};
|
||||
% Unsigned integers
|
||||
<<16#CC, V:8/unsigned-integer, Rest/binary>> -> {V, Rest};
|
||||
<<16#CD, V:16/big-unsigned-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#CE, V:32/big-unsigned-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#CF, V:64/big-unsigned-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
% Signed integers
|
||||
<<16#D0, V:8/signed-integer, Rest/binary>> -> {V, Rest};
|
||||
<<16#D1, V:16/big-signed-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#D2, V:32/big-signed-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#D3, V:64/big-signed-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
% Raw bytes
|
||||
<<16#DA, L:16/unsigned-integer-unit:1, V:L/binary, Rest/binary>> -> {V, Rest};
|
||||
<<16#DB, L:32/unsigned-integer-unit:1, V:L/binary, Rest/binary>> -> {V, Rest};
|
||||
% Arrays
|
||||
<<16#DC, L:16/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_array_(Rest, L, []);
|
||||
<<16#DD, L:32/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_array_(Rest, L, []);
|
||||
% Maps
|
||||
<<16#DE, L:16/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_map_(Rest, L, []);
|
||||
<<16#DF, L:32/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_map_(Rest, L, []);
|
||||
%% ATOMS
|
||||
<<16#C0, Rest/binary>> -> {nil, Rest};
|
||||
|
||||
% Tag-encoded lengths (kept last, for speed)
|
||||
<<0:1, V:7, Rest/binary>> -> {V, Rest}; % positive int
|
||||
<<2#111:3, V:5, Rest/binary>> -> {V - 2#100000, Rest}; % negative int
|
||||
<<2#101:3, L:5, V:L/binary, Rest/binary>> -> {V, Rest}; % raw bytes
|
||||
<<2#1001:4, L:4, Rest/binary>> -> unpack_array_(Rest, L, []); % array
|
||||
<<2#1000:4, L:4, Rest/binary>> -> unpack_map_(Rest, L, []); % map
|
||||
%% String (skip failed to decode with unicode (
|
||||
%% TODO: make this optional for performatnce
|
||||
<<16#C1, L:32/big-unsigned-integer-unit:1, V:L/binary, Rest/binary>> ->
|
||||
case unicode:characters_to_list(V) of
|
||||
{error, String, _RestData} -> {String, Rest};
|
||||
{incomplete, String, _ } -> {String, Rest};
|
||||
String -> {String, Rest}
|
||||
end;
|
||||
|
||||
% Invalid data
|
||||
<<F, R/binary>> when F==16#C1;
|
||||
F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9;
|
||||
F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9 ->
|
||||
throw({badarg, <<F, R/binary>>});
|
||||
% Incomplete data (we've covered every complete/invalid case; anything left is incomplete)
|
||||
_ ->
|
||||
throw(incomplete)
|
||||
%% Boolean
|
||||
<<16#C2, Rest/binary>> -> {false, Rest};
|
||||
<<16#C3, Rest/binary>> -> {true, Rest};
|
||||
|
||||
%% Floats
|
||||
<<16#CA, V:32/float-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#CB, V:64/float-unit:1, Rest/binary>> -> {V, Rest};
|
||||
|
||||
%% Unsigned integers
|
||||
<<16#CC, V:8/unsigned-integer, Rest/binary>> -> {V, Rest};
|
||||
<<16#CD, V:16/big-unsigned-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#CE, V:32/big-unsigned-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#CF, V:64/big-unsigned-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
|
||||
%% Signed integers
|
||||
<<16#D0, V:8/signed-integer, Rest/binary>> -> {V, Rest};
|
||||
<<16#D1, V:16/big-signed-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#D2, V:32/big-signed-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
<<16#D3, V:64/big-signed-integer-unit:1, Rest/binary>> -> {V, Rest};
|
||||
|
||||
%% Raw bytes
|
||||
<<16#DA, L:16/unsigned-integer-unit:1, V:L/binary, Rest/binary>> -> {V, Rest};
|
||||
<<16#DB, L:32/unsigned-integer-unit:1, V:L/binary, Rest/binary>> -> {V, Rest};
|
||||
|
||||
%% Arrays
|
||||
<<16#DC, L:16/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_array_(Rest, L, []);
|
||||
<<16#DD, L:32/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_array_(Rest, L, []);
|
||||
|
||||
%% Maps
|
||||
<<16#DE, L:16/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_map_(Rest, L, []);
|
||||
<<16#DF, L:32/big-unsigned-integer-unit:1, Rest/binary>> -> unpack_map_(Rest, L, []);
|
||||
|
||||
%% Tag-encoded lengths
|
||||
<<0:1, V:7, Rest/binary>> -> {V, Rest}; % positive int
|
||||
<<2#111:3, V:5, Rest/binary>> -> {V - 2#100000, Rest}; % negative int
|
||||
<<2#101:3, L:5, V:L/binary, Rest/binary>> -> {V, Rest}; % raw bytes
|
||||
<<2#1001:4, L:4, Rest/binary>> -> unpack_array_(Rest, L, []); % array
|
||||
<<2#1000:4, L:4, Rest/binary>> -> unpack_map_(Rest, L, []); % map
|
||||
|
||||
%% Invalid data
|
||||
<<F, R/binary>> when
|
||||
F==16#C4; F==16#C5; F==16#C6; F==16#C7; F==16#C8; F==16#C9;
|
||||
F==16#D4; F==16#D5; F==16#D6; F==16#D7; F==16#D8; F==16#D9 ->
|
||||
throw({badarg, <<F, R/binary>>});
|
||||
|
||||
%% Incomplete data (we've covered every complete/invalid case;
|
||||
%% anything left is incomplete)
|
||||
_ ->
|
||||
throw(incomplete)
|
||||
end.
|
||||
|
||||
|
||||
|
@ -15,8 +15,7 @@ prop_type() ->
|
||||
|
||||
|
||||
choose_reserved() ->
|
||||
oneof([<<16#C1>>,
|
||||
<<16#C4>>,
|
||||
oneof([<<16#C4>>,
|
||||
<<16#C5>>,
|
||||
<<16#C6>>,
|
||||
<<16#C7>>,
|
||||
|
@ -85,3 +85,12 @@ issue_5_test() ->
|
||||
114,115,164,100,97,116,97,145,130,168,119,111,114,
|
||||
107,101,114,105,100,165,115,116,100,46,49,165,115,108,111,116,115,144>>,
|
||||
?assertEqual(Bin0, msgpack:pack(Term)).
|
||||
|
||||
|
||||
string_test() ->
|
||||
{ok, CWD} = file:get_cwd(),
|
||||
Path = CWD ++ "/../test/utf8.txt",
|
||||
{ok, UnicodeBin} = file:read_file(Path),
|
||||
String = unicode:characters_to_list(UnicodeBin),
|
||||
MsgpackStringBin = msgpack:pack_string(String),
|
||||
{ok, String} = msgpack:unpack(MsgpackStringBin).
|
||||
|
1
test/utf8.txt
Normal file
1
test/utf8.txt
Normal file
@ -0,0 +1 @@
|
||||
東京埼玉
|
Loading…
Reference in New Issue
Block a user