Thrift: Generate structural fingerprints for thrift structs.

Summary:
We are going to write a dense protocol soon that eliminates some metadata.
To prevent version conflicts, we want each structure to have a
structural fingerprint that will change whenever the struct changes
in a way that will affect the dense protocol.
This change computes those fingerprints and puts them in
the generated C++ code.

Reviewed By: aditya, mcslee

Test Plan:
Recompiled thrift.
Thrifted DebugProtoTest with old and new compilers.
Compared output.
Also ran thrift with those "cout"s uncommented,
examined the fingerprint material,
and verified the hashes.

Revert Plan: ok


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665227 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Reiss 2007-08-28 20:49:17 +00:00
parent 2375312fa0
commit 18bf22d5ba
15 changed files with 575 additions and 3 deletions

View File

@ -7,6 +7,7 @@ thrift_OBJDIR = obj
thrift_SOURCES = src/thrifty.yy \
src/thriftl.ll \
src/main.cc \
src/md5c.c \
src/generate/t_generator.cc \
src/generate/t_cpp_generator.cc \
src/generate/t_java_generator.cc \

View File

@ -377,6 +377,20 @@ void t_cpp_generator::generate_struct_definition(ofstream& out,
endl;
indent_up();
// Put the fingerprint up top for all to see.
if (tstruct->has_fingerprint()) {
out <<
indent() << "static char* ascii_fingerprint = \"" <<
tstruct->get_ascii_fingerprint() << "\";" << endl <<
indent() << "static char binary_fingerprint[] = {";
char* comma = "";
for (int i = 0; i < t_struct::fingerprint_len; i++) {
out << comma << "0x" << t_struct::byte_to_hex(tstruct->get_binary_fingerprint()[i]);
comma = ",";
}
out << "};" << endl << endl;
}
// Get members
vector<t_field*>::const_iterator m_iter;
const vector<t_field*>& members = tstruct->get_members();

View File

@ -513,6 +513,27 @@ void dump_docstrings(t_program* program) {
}
}
/**
* Call generate_fingerprint for every structure.
*/
void generate_all_fingerprints(t_program* program) {
const vector<t_struct*>& structs = program->get_structs();
vector<t_struct*>::const_iterator s_iter;
for (s_iter = structs.begin(); s_iter != structs.end(); ++s_iter) {
t_struct* st = *s_iter;
st->generate_fingerprint();
}
// If you want to generate fingerprints for implicit structures, start here.
/*
const vector<t_service*>& services = program->get_services();
vector<t_service*>::const_iterator v_iter;
for (v_iter = services.begin(); v_iter != services.end(); ++v_iter) {
t_service* sv = *v_iter;
}
*/
}
/**
* Diplays the usage message and then exits with an error code.
*/
@ -740,6 +761,9 @@ void generate(t_program* program) {
try {
pverbose("Program: %s\n", program->get_path().c_str());
// Compute fingerprints.
generate_all_fingerprints(program);
if (gen_cpp) {
pverbose("Generating C++\n");
t_cpp_generator* cpp = new t_cpp_generator(program);

45
compiler/cpp/src/md5.h Normal file
View File

@ -0,0 +1,45 @@
/* MD5.H - header file for MD5C.C
*
* This file has been modified for use in Thrift.
*/
/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
rights reserved.
License to copy and use this software is granted provided that it
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
Algorithm" in all material mentioning or referencing this software
or this function.
License is also granted to make and use derivative works provided
that such works are identified as "derived from the RSA Data
Security, Inc. MD5 Message-Digest Algorithm" in all material
mentioning or referencing the derived work.
RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.
These notices must be retained in any copies of any part of this
documentation and/or software.
*/
#include <stdint.h>
/* MD5 context. */
typedef struct {
uint32_t state[4]; /* state (ABCD) */
uint32_t count[2]; /* number of bits, modulo 2^64 (lsb first) */
unsigned char buffer[64]; /* input buffer */
} MD5_CTX;
#ifdef __cplusplus
extern "C" {
#endif
void MD5Init(MD5_CTX *);
void MD5Update(MD5_CTX *, unsigned char *, unsigned int);
void MD5Final(unsigned char [16], MD5_CTX *);
#ifdef __cplusplus
}
#endif

354
compiler/cpp/src/md5c.c Normal file
View File

@ -0,0 +1,354 @@
/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*
* This file has been modified for use in Thrift.
*/
/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
rights reserved.
License to copy and use this software is granted provided that it
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
Algorithm" in all material mentioning or referencing this software
or this function.
License is also granted to make and use derivative works provided
that such works are identified as "derived from the RSA Data
Security, Inc. MD5 Message-Digest Algorithm" in all material
mentioning or referencing the derived work.
RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.
These notices must be retained in any copies of any part of this
documentation and/or software.
*/
#include "md5.h"
/* GLOBAL.H - RSAREF types and constants
*/
/* POINTER defines a generic pointer type */
typedef unsigned char *POINTER;
/* UINT2 defines a two byte word */
typedef uint16_t UINT2;
/* UINT4 defines a four byte word */
typedef uint32_t UINT4;
#define PROTO_LIST(list) list
/* END GLOBAL.H
*/
/* Constants for MD5Transform routine.
*/
#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21
static void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64]));
static void Encode PROTO_LIST
((unsigned char *, UINT4 *, unsigned int));
static void Decode PROTO_LIST
((UINT4 *, unsigned char *, unsigned int));
static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
static void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
static unsigned char PADDING[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* F, G, H and I are basic MD5 functions.
*/
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
/* ROTATE_LEFT rotates x left n bits.
*/
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
Rotation is separate from addition to prevent recomputation.
*/
#define FF(a, b, c, d, x, s, ac) { \
(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define GG(a, b, c, d, x, s, ac) { \
(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define HH(a, b, c, d, x, s, ac) { \
(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define II(a, b, c, d, x, s, ac) { \
(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
/* MD5 initialization. Begins an MD5 operation, writing a new context.
*/
void MD5Init (context)
MD5_CTX *context; /* context */
{
context->count[0] = context->count[1] = 0;
/* Load magic initialization constants.
*/
context->state[0] = 0x67452301;
context->state[1] = 0xefcdab89;
context->state[2] = 0x98badcfe;
context->state[3] = 0x10325476;
}
/* MD5 block update operation. Continues an MD5 message-digest
operation, processing another message block, and updating the
context.
*/
void MD5Update (context, input, inputLen)
MD5_CTX *context; /* context */
unsigned char *input; /* input block */
unsigned int inputLen; /* length of input block */
{
unsigned int i, index, partLen;
/* Compute number of bytes mod 64 */
index = (unsigned int)((context->count[0] >> 3) & 0x3F);
/* Update number of bits */
if ((context->count[0] += ((UINT4)inputLen << 3))
< ((UINT4)inputLen << 3))
context->count[1]++;
context->count[1] += ((UINT4)inputLen >> 29);
partLen = 64 - index;
/* Transform as many times as possible.
*/
if (inputLen >= partLen) {
MD5_memcpy
((POINTER)&context->buffer[index], (POINTER)input, partLen);
MD5Transform (context->state, context->buffer);
for (i = partLen; i + 63 < inputLen; i += 64)
MD5Transform (context->state, &input[i]);
index = 0;
}
else
i = 0;
/* Buffer remaining input */
MD5_memcpy
((POINTER)&context->buffer[index], (POINTER)&input[i],
inputLen-i);
}
/* MD5 finalization. Ends an MD5 message-digest operation, writing the
the message digest and zeroizing the context.
*/
void MD5Final (digest, context)
unsigned char digest[16]; /* message digest */
MD5_CTX *context; /* context */
{
unsigned char bits[8];
unsigned int index, padLen;
/* Save number of bits */
Encode (bits, context->count, 8);
/* Pad out to 56 mod 64.
*/
index = (unsigned int)((context->count[0] >> 3) & 0x3f);
padLen = (index < 56) ? (56 - index) : (120 - index);
MD5Update (context, PADDING, padLen);
/* Append length (before padding) */
MD5Update (context, bits, 8);
/* Store state in digest */
Encode (digest, context->state, 16);
/* Zeroize sensitive information.
*/
MD5_memset ((POINTER)context, 0, sizeof (*context));
}
/* MD5 basic transformation. Transforms state based on block.
*/
static void MD5Transform (state, block)
UINT4 state[4];
unsigned char block[64];
{
UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
Decode (x, block, 64);
/* Round 1 */
FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
/* Round 2 */
GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
/* Round 3 */
HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
/* Round 4 */
II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
/* Zeroize sensitive information.
*/
MD5_memset ((POINTER)x, 0, sizeof (x));
}
/* Encodes input (UINT4) into output (unsigned char). Assumes len is
a multiple of 4.
*/
static void Encode (output, input, len)
unsigned char *output;
UINT4 *input;
unsigned int len;
{
unsigned int i, j;
for (i = 0, j = 0; j < len; i++, j += 4) {
output[j] = (unsigned char)(input[i] & 0xff);
output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
}
}
/* Decodes input (unsigned char) into output (UINT4). Assumes len is
a multiple of 4.
*/
static void Decode (output, input, len)
UINT4 *output;
unsigned char *input;
unsigned int len;
{
unsigned int i, j;
for (i = 0, j = 0; j < len; i++, j += 4)
output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
(((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
}
/* Note: Replace "for loop" with standard memcpy if possible.
*/
static void MD5_memcpy (output, input, len)
POINTER output;
POINTER input;
unsigned int len;
{
unsigned int i;
for (i = 0; i < len; i++)
output[i] = input[i];
}
/* Note: Replace "for loop" with standard memset if possible.
*/
static void MD5_memset (output, value, len)
POINTER output;
int value;
unsigned int len;
{
unsigned int i;
for (i = 0; i < len; i++)
((char *)output)[i] = (char)value;
}

View File

@ -7,6 +7,7 @@
#ifndef T_BASE_TYPE_H
#define T_BASE_TYPE_H
#include <cstdlib>
#include "t_type.h"
/**
@ -84,7 +85,22 @@ class t_base_type : public t_type {
bool is_base_type() const {
return true;
}
virtual std::string get_fingerprint_material() const {
switch (base_) {
case TYPE_VOID : return "void"; break;
case TYPE_STRING : return "string"; break;
case TYPE_BOOL : return "bool"; break;
case TYPE_BYTE : return "byte"; break;
case TYPE_I16 : return "i16"; break;
case TYPE_I32 : return "i32"; break;
case TYPE_I64 : return "164"; break;
case TYPE_DOUBLE : return "double"; break;
default:
throw "BUG: Can't get fingerprint material for this base type.";
}
}
private:
t_base base_;

View File

@ -36,6 +36,10 @@ class t_enum : public t_type {
return true;
}
virtual std::string get_fingerprint_material() const {
return "enum";
}
private:
std::vector<t_enum_value*> constants_;
};

View File

@ -8,6 +8,7 @@
#define T_FIELD_H
#include <string>
#include <boost/lexical_cast.hpp>
// Forward declare for xsd_attrs
class t_struct;
@ -112,6 +113,14 @@ class t_field {
has_doc_ = true;
}
// This is not the same function as t_type::get_fingerprint_material,
// but it does the same thing.
std::string get_fingerprint_material() const {
return boost::lexical_cast<std::string>(key_) + ":" +
(req_ == OPTIONAL ? "opt-" : "") +
type_->get_fingerprint_material();
}
private:
t_type* type_;
std::string name_;

View File

@ -27,6 +27,10 @@ class t_list : public t_container {
return true;
}
virtual std::string get_fingerprint_material() const {
return "list<" + elem_type_->get_fingerprint_material() + ">";
}
private:
t_type* elem_type_;
};

View File

@ -33,6 +33,11 @@ class t_map : public t_container {
return true;
}
virtual std::string get_fingerprint_material() const {
return "map<" + key_type_->get_fingerprint_material() +
"," + val_type_->get_fingerprint_material() + ">";
}
private:
t_type* key_type_;
t_type* val_type_;

View File

@ -43,6 +43,11 @@ class t_service : public t_type {
return extends_;
}
virtual std::string get_fingerprint_material() const {
// Services should never be used in fingerprints.
throw "BUG: Can't get fingerprint material for service.";
}
private:
std::vector<t_function*> functions_;
t_service* extends_;

View File

@ -27,6 +27,10 @@ class t_set : public t_container {
return true;
}
virtual std::string get_fingerprint_material() const {
return "set<" + elem_type_->get_fingerprint_material() + ">";
}
private:
t_type* elem_type_;
};

View File

@ -9,10 +9,14 @@
#include <vector>
#include <string>
#include <cstring>
#include "t_type.h"
#include "t_field.h"
// What's worse? This, or making a src/parse/non_inlined.cc?
#include "md5.h"
// Forward declare that puppy
class t_program;
@ -27,12 +31,18 @@ class t_struct : public t_type {
t_struct(t_program* program) :
t_type(program),
is_xception_(false),
xsd_all_(false) {}
xsd_all_(false)
{
memset(fingerprint_, 0, sizeof(fingerprint_));
}
t_struct(t_program* program, const std::string& name) :
t_type(program, name),
is_xception_(false),
xsd_all_(false) {}
xsd_all_(false)
{
memset(fingerprint_, 0, sizeof(fingerprint_));
}
void set_name(const std::string& name) {
name_ = name;
@ -66,12 +76,80 @@ class t_struct : public t_type {
return is_xception_;
}
virtual std::string get_fingerprint_material() const {
std::string rv = "{";
std::vector<t_field*>::const_iterator m_iter;
for (m_iter = members_.begin(); m_iter != members_.end(); ++m_iter) {
rv += (**m_iter).get_fingerprint_material();
rv += ";";
}
rv += "}";
return rv;
}
// Fingerprint should change whenever (and only when)
// the encoding via TDenseProtocol changes.
static const int fingerprint_len = 16;
// Call this before trying get_*_fingerprint().
void generate_fingerprint() {
std::string material = get_fingerprint_material();
MD5_CTX ctx;
MD5Init(&ctx);
MD5Update(&ctx, (unsigned char*)(material.data()), material.size());
MD5Final(fingerprint_, &ctx);
//std::cout << get_name() << std::endl;
//std::cout << material << std::endl;
//std::cout << get_ascii_fingerprint() << std::endl << std::endl;
}
bool has_fingerprint() const {
for (int i = 0; i < fingerprint_len; i++) {
if (fingerprint_[i] != 0) {
return true;
}
}
return false;
}
const uint8_t* get_binary_fingerprint() const {
return fingerprint_;
}
std::string get_ascii_fingerprint() const {
std::string rv;
const uint8_t* fp = get_binary_fingerprint();
for (int i = 0; i < fingerprint_len; i++) {
rv += byte_to_hex(fp[i]);
}
return rv;
}
// This function will break (maybe badly) unless 0 <= num <= 16.
static char nybble_to_xdigit(int num) {
if (num < 10) {
return '0' + num;
} else {
return 'A' + num - 10;
}
}
static std::string byte_to_hex(uint8_t byte) {
std::string rv;
rv += nybble_to_xdigit(byte >> 4);
rv += nybble_to_xdigit(byte & 0x0f);
return rv;
}
private:
std::vector<t_field*> members_;
bool is_xception_;
bool xsd_all_;
uint8_t fingerprint_[fingerprint_len];
};
#endif

View File

@ -50,6 +50,11 @@ class t_type : public t_doc {
return program_;
}
// Return a string that uniquely identifies this type
// from any other thrift type in the world, as far as
// TDenseProtocol is concerned.
virtual std::string get_fingerprint_material() const = 0;
protected:
t_type() {}

View File

@ -39,6 +39,10 @@ class t_typedef : public t_type {
return true;
}
virtual std::string get_fingerprint_material() const {
return type_->get_fingerprint_material();
}
private:
t_type* type_;
std::string symbolic_;