From 147c2849af9c28f2ce347b4005e022ac13db9dd8 Mon Sep 17 00:00:00 2001 From: Jim Apple Date: Sat, 18 Mar 2017 12:56:50 -0700 Subject: [PATCH] THRIFT-2026: Eliminate some undefined behavior in C/C++ Clients: glib, C++ Patch: Jim Apple This closes #1214 This patch fixes some undefined behavior were found using Clang's UndefinedBehaviorSanitizer (UBSan). To check for undefined behavior, run /build/docker/scripts/ubsan.sh. This is run during CI builds, as well. The examples of the types of undefined behavior fixed in this commit are: 1. Enumerations exhibit undefined behavior when they have values outside of a range dependent on the values of their enumerators, as specified in C++14's chapter 7.2 ("Enumeration declarations"), paragraph 8. 2. Left shift of negative values, used in zigzag encoding, is undefined behavior. See 5.8 ("Shift operators"), paragraph 2 for C++ and 6.5.7 ("Bitwise shift operators"), paragraph 4 for C99 and C11. --- .travis.yml | 6 ++++ build/docker/scripts/ubsan.sh | 35 +++++++++++++++++++ .../src/thrift/generate/t_c_glib_generator.cc | 4 +-- .../cpp/src/thrift/generate/t_d_generator.cc | 2 +- compiler/cpp/src/thrift/parse/t_base_type.h | 6 ++-- .../c_glib/protocol/thrift_compact_protocol.c | 4 +-- .../src/thrift/protocol/TCompactProtocol.tcc | 4 +-- 7 files changed, 51 insertions(+), 10 deletions(-) create mode 100755 build/docker/scripts/ubsan.sh diff --git a/.travis.yml b/.travis.yml index 70293edef..10c6fd09b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -152,6 +152,12 @@ env: BUILD_ENV="-e CC=gcc -e CXX=g++" DISTRO=debian + # C and C++ undefined behavior. This wraps autotools.sh, but each binary crashes if + # undefined behavior occurs. Skips the known flaky tests. + - TEST_NAME="UBSan" + SCRIPT="ubsan.sh" + BUILD_ARG="--without-haskell --without-nodejs --without-perl --without-python" + matrix: include: # QA jobs for code analytics and metrics diff --git a/build/docker/scripts/ubsan.sh b/build/docker/scripts/ubsan.sh new file mode 100755 index 000000000..6db10f3d8 --- /dev/null +++ b/build/docker/scripts/ubsan.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +set -ex + +# Wraps autotools.sh, but each binary crashes if it exhibits undefined behavior. See +# http://releases.llvm.org/3.8.0/tools/clang/docs/UndefinedBehaviorSanitizer.html + +# Install a more recent clang than default: +sudo apt-get update +sudo apt-get install -y --no-install-recommends clang-3.8 llvm-3.8-dev +export CC=clang-3.8 +export CXX=clang++-3.8 + +# Set the undefined behavior flags. This crashes on all undefined behavior except for +# undefined casting, aka "vptr". +# +# TODO: fix undefined vptr behavior and turn this option back on. +export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined -fno-sanitize=vptr" +# Builds without optimization and with debugging symbols for making crash reports more +# readable. +export CFLAGS="${CFLAGS} -O0 -ggdb3" +export CXXFLAGS="${CFLAGS}" +export UBSAN_OPTIONS=print_stacktrace=1 + +# llvm-symbolizer must be on PATH, but the above installation instals a binary called +# "llvm-symbolizer-3.8", not "llvm-symbolizer". This fixes that with a softlink in a new +# directory. +CLANG_PATH="$(mktemp -d)" +trap "rm -rf ${CLANG_PATH}" EXIT +ln -s "$(whereis llvm-symbolizer-3.8 | rev | cut -d ' ' -f 1 | rev)" \ + "${CLANG_PATH}/llvm-symbolizer" +export PATH="${CLANG_PATH}:${PATH}" +llvm-symbolizer -version + +build/docker/scripts/autotools.sh $* diff --git a/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc b/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc index 7d4e4f03d..7d4818e99 100644 --- a/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc +++ b/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc @@ -4061,9 +4061,9 @@ void t_c_glib_generator::generate_deserialize_field(ofstream& out, // if the type is not required and this is a thrift struct (no prefix), // set the isset variable. if the type is required, then set the // local variable indicating the value was set, so that we can do // validation later. - if (tfield->get_req() != t_field::T_REQUIRED && prefix != "") { + if (prefix != "" && tfield->get_req() != t_field::T_REQUIRED) { indent(out) << prefix << "__isset_" << tfield->get_name() << suffix << " = TRUE;" << endl; - } else if (tfield->get_req() == t_field::T_REQUIRED && prefix != "") { + } else if (prefix != "" && tfield->get_req() == t_field::T_REQUIRED) { indent(out) << "isset_" << tfield->get_name() << " = TRUE;" << endl; } } diff --git a/compiler/cpp/src/thrift/generate/t_d_generator.cc b/compiler/cpp/src/thrift/generate/t_d_generator.cc index 481668146..819254977 100644 --- a/compiler/cpp/src/thrift/generate/t_d_generator.cc +++ b/compiler/cpp/src/thrift/generate/t_d_generator.cc @@ -382,7 +382,7 @@ private: << (*f_iter)->get_name() << " called\");" << endl; t_base_type* rt = (t_base_type*)(*f_iter)->get_returntype(); - if (rt->get_base() != t_base_type::TYPE_VOID) { + if (!rt->is_void()) { indent(out) << "return typeof(return).init;" << endl; } diff --git a/compiler/cpp/src/thrift/parse/t_base_type.h b/compiler/cpp/src/thrift/parse/t_base_type.h index 32523cb9d..71398ba70 100644 --- a/compiler/cpp/src/thrift/parse/t_base_type.h +++ b/compiler/cpp/src/thrift/parse/t_base_type.h @@ -57,15 +57,15 @@ public: void set_string_list(bool val) { string_list_ = val; } - bool is_string_list() const { return (base_ == TYPE_STRING) && string_list_; } + bool is_string_list() const { return string_list_ && (base_ == TYPE_STRING); } void set_binary(bool val) { binary_ = val; } - bool is_binary() const { return (base_ == TYPE_STRING) && binary_; } + bool is_binary() const { return binary_ && (base_ == TYPE_STRING); } void set_string_enum(bool val) { string_enum_ = val; } - bool is_string_enum() const { return base_ == TYPE_STRING && string_enum_; } + bool is_string_enum() const { return string_enum_ && base_ == TYPE_STRING; } void add_string_enum_val(std::string val) { string_enum_vals_.push_back(val); } diff --git a/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c b/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c index a0dd3cc45..cae4749f0 100644 --- a/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c +++ b/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c @@ -120,7 +120,7 @@ thrift_bitwise_cast_gdouble (const guint64 v) static guint64 i64_to_zigzag (const gint64 l) { - return (l << 1) ^ (l >> 63); + return (((guint64)l) << 1) ^ (l >> 63); } /** @@ -130,7 +130,7 @@ i64_to_zigzag (const gint64 l) static guint32 i32_to_zigzag (const gint32 n) { - return (n << 1) ^ (n >> 31); + return (((guint32)n) << 1) ^ (n >> 31); } /** diff --git a/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc b/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc index 6bbf9ef34..d40c33134 100644 --- a/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc +++ b/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc @@ -387,7 +387,7 @@ uint32_t TCompactProtocolT::writeVarint64(uint64_t n) { */ template uint64_t TCompactProtocolT::i64ToZigzag(const int64_t l) { - return (l << 1) ^ (l >> 63); + return (static_cast(l) << 1) ^ (l >> 63); } /** @@ -396,7 +396,7 @@ uint64_t TCompactProtocolT::i64ToZigzag(const int64_t l) { */ template uint32_t TCompactProtocolT::i32ToZigzag(const int32_t n) { - return (n << 1) ^ (n >> 31); + return (static_cast(n) << 1) ^ (n >> 31); } /**