THRIFT-153. Proper handling of strings with escapes (in IDL)

- Recognize and parse escape characters in .thrift files.
- Escape strings used as constants in generated source files.


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@758922 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Reiss 2009-03-26 23:32:36 +00:00
parent 4a05434d0b
commit 82e6fc0266
16 changed files with 105 additions and 26 deletions

View File

@ -1794,7 +1794,7 @@ string t_cocoa_generator::render_const_value(string name,
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
render << "@\"" + value->get_string() + "\"";
render << "@\"" << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "YES" : "NO");

View File

@ -538,7 +538,7 @@ string t_cpp_generator::render_const_value(ofstream& out, string name, t_type* t
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
render << "\"" + value->get_string() + "\"";
render << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "true" : "false");

View File

@ -330,7 +330,7 @@ std::string t_csharp_generator::render_const_value(ofstream& out, string name, t
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
render << "\"" + value->get_string() + "\"";
render << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "true" : "false");

View File

@ -315,7 +315,7 @@ string t_erl_generator::render_const_value(t_type* type, t_const_value* value) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "\"" << value->get_string() << "\"";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");

View File

@ -59,6 +59,19 @@ void t_generator::generate_program() {
close_generator();
}
string t_generator::escape_string(const string &in) const {
string result = "";
for (string::const_iterator it = in.begin(); it < in.end(); it++) {
std::map<char, std::string>::const_iterator res = escape_.find(*it);
if (res != escape_.end()) {
result.append(res->second);
} else {
result.push_back(*it);
}
}
return result;
}
void t_generator::generate_consts(vector<t_const*> consts) {
vector<t_const*>::iterator c_iter;
for (c_iter = consts.begin(); c_iter != consts.end(); ++c_iter) {

View File

@ -27,6 +27,11 @@ class t_generator {
indent_ = 0;
program_ = program;
program_name_ = get_program_name(program);
escape_['\n'] = "\\n";
escape_['\r'] = "\\r";
escape_['\t'] = "\\t";
escape_['"'] = "\\\"";
escape_['\\'] = "\\\\";
}
virtual ~t_generator() {}
@ -45,6 +50,16 @@ class t_generator {
const std::string& line_prefix,
const std::string& contents,
const std::string& comment_end);
/**
* Escape string to use one in generated sources.
*/
virtual std::string escape_string(const std::string &in) const;
std::string get_escaped_string(t_const_value* constval) {
return escape_string(constval->get_string());
}
protected:
/**
@ -184,6 +199,11 @@ class t_generator {
*/
std::string out_dir_base_;
/**
* Map of characters to escape in string literals.
*/
std::map<char, std::string> escape_;
private:
/**
* Current code indentation level

View File

@ -303,7 +303,7 @@ string t_hs_generator::render_const_value(t_type* type, t_const_value* value) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "\"" << value->get_string() << "\"";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "True" : "False");

View File

@ -32,6 +32,12 @@ class t_html_generator : public t_generator {
: t_generator(program)
{
out_dir_base_ = "gen-html";
escape_.clear();
escape_['&'] = "&amp;";
escape_['<'] = "&lt;";
escape_['>'] = "&gt;";
escape_['"'] = "&quot;";
escape_['\''] = "&apos;";
}
void generate_program();
@ -396,7 +402,7 @@ void t_html_generator::print_const_value(t_const_value* tvalue) {
f_out_ << tvalue->get_double();
break;
case t_const_value::CV_STRING:
f_out_ << "\"" << tvalue->get_string() << "\"";
f_out_ << '"' << get_escaped_string(tvalue) << '"';
break;
case t_const_value::CV_MAP:
{

View File

@ -519,7 +519,7 @@ string t_java_generator::render_const_value(ofstream& out, string name, t_type*
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
render << "\"" + value->get_string() + "\"";
render << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "true" : "false");

View File

@ -361,7 +361,7 @@ string t_ocaml_generator::render_const_value(t_type* type, t_const_value* value)
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "\"" << value->get_string() << "\"";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");

View File

@ -31,6 +31,8 @@ class t_perl_generator : public t_oop_generator {
: t_oop_generator(program)
{
out_dir_base_ = "gen-perl";
escape_['$'] = "\\$";
escape_['@'] = "\\@";
}
/**
@ -328,7 +330,7 @@ string t_perl_generator::render_const_value(t_type* type, t_const_value* value)
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "'" << value->get_string() << "'";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "1" : "0");
@ -1796,5 +1798,4 @@ string t_perl_generator ::type_to_enum(t_type* type) {
throw "INVALID TYPE IN type_to_enum: " + type->get_name();
}
THRIFT_REGISTER_GENERATOR(perl, "Perl", "");

View File

@ -51,6 +51,7 @@ class t_php_generator : public t_oop_generator {
}
out_dir_base_ = (binary_inline_ ? "gen-phpi" : "gen-php");
escape_['$'] = "\\$";
}
/**
@ -371,7 +372,7 @@ string t_php_generator::render_const_value(t_type* type, t_const_value* value) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "'" << value->get_string() << "'";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");

View File

@ -387,7 +387,7 @@ string t_py_generator::render_const_value(t_type* type, t_const_value* value) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "'" << value->get_string() << "'";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "True" : "False");

View File

@ -338,7 +338,7 @@ string t_rb_generator::render_const_value(t_type* type, t_const_value* value) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "%q\"" << value->get_string() << '"';
out << "%q\"" << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");

View File

@ -348,7 +348,7 @@ string t_st_generator::render_const_value(t_type* type, t_const_value* value) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
out << "'" << value->get_string() << "'";
out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");

View File

@ -14,6 +14,7 @@
%{
#include <string>
#include <errno.h>
#include "main.h"
@ -58,10 +59,8 @@ doctext ("/**"([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
comment ("//"[^\n]*)
unixcomment ("#"[^\n]*)
symbol ([:;\,\{\}\(\)\=<>\[\]])
dliteral ("\""[^"]*"\"")
sliteral ("'"[^']*"'")
st_identifier ([a-zA-Z-][\.a-zA-Z_0-9-]*)
literal_begin (['\"])
%%
@ -222,17 +221,56 @@ st_identifier ([a-zA-Z-][\.a-zA-Z_0-9-]*)
return tok_st_identifier;
}
{dliteral} {
yylval.id = strdup(yytext+1);
yylval.id[strlen(yylval.id)-1] = '\0';
return tok_literal;
{literal_begin} {
char mark = yytext[0];
std::string result;
for(;;)
{
int ch = yyinput();
switch (ch) {
case EOF:
yyerror("End of file while read string at %d\n", yylineno);
exit(1);
case '\n':
yyerror("End of line while read string at %d\n", yylineno - 1);
exit(1);
case '\\':
ch = yyinput();
switch (ch) {
case 'r':
result.push_back('\r');
continue;
case 'n':
result.push_back('\n');
continue;
case 't':
result.push_back('\t');
continue;
case '"':
result.push_back('"');
continue;
case '\'':
result.push_back('\'');
continue;
case '\\':
result.push_back('\\');
continue;
default:
yyerror("Bad escape character\n");
return -1;
}
break;
default:
if (ch == mark) {
yylval.id = strdup(result.c_str());
return tok_literal;
} else {
result.push_back(ch);
}
}
}
}
{sliteral} {
yylval.id = strdup(yytext+1);
yylval.id[strlen(yylval.id)-1] = '\0';
return tok_literal;
}
{doctext} {
/* This does not show up in the parse tree. */