1286 lines
34 KiB
C++
1286 lines
34 KiB
C++
|
|
#include <pro.h>
|
|
|
|
#include "java.hpp"
|
|
#include "oututil.hpp"
|
|
#include "upgrade.hpp"
|
|
|
|
#ifdef TEST_FMTSTR
|
|
//-------------------------------------------------------------------------
|
|
AS_PRINTF(2, 0) void out_java_t::out_vprintf(
|
|
const char *format,
|
|
va_list va)
|
|
{
|
|
outbuf.cat_vsprnt(format, va);
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
void out_java_t::out_char(char c)
|
|
{
|
|
outbuf.append(c);
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
void out_java_t::out_line(const char *str)
|
|
{
|
|
outbuf.append(str);
|
|
}
|
|
#endif
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
//-----------------------------------------------------------------------
|
|
inline bool is_class_or_typeref(wchar32_t cp)
|
|
{
|
|
return cp == j_class || cp == j_typeref;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
const TXS tp_decl[] =
|
|
{
|
|
TXS_DECLARE("void"), // ATTENTION: only for fmtStr
|
|
TXS_DECLARE("byte"),
|
|
TXS_DECLARE("char"),
|
|
TXS_DECLARE("double"),
|
|
TXS_DECLARE("float"),
|
|
TXS_DECLARE("int"),
|
|
TXS_DECLARE("long"),
|
|
TXS_DECLARE("short"),
|
|
TXS_DECLARE("boolean")
|
|
};
|
|
|
|
//-----------------------------------------------------------------------
|
|
// BaseType:
|
|
// B
|
|
// C
|
|
// D
|
|
// F
|
|
// I
|
|
// J
|
|
// S
|
|
// Z
|
|
// ...to which we'll add the support for V=void
|
|
const TXS *get_base_typename(uchar tag, bool or_void)
|
|
{
|
|
switch ( tag )
|
|
{
|
|
case j_void_ret: return or_void ? &tp_decl[0] : NULL;
|
|
|
|
case j_byte: return &tp_decl[1];
|
|
case j_char: return &tp_decl[2];
|
|
case j_double: return &tp_decl[3];
|
|
case j_float: return &tp_decl[4];
|
|
case j_int: return &tp_decl[5];
|
|
case j_long: return &tp_decl[6];
|
|
case j_short: return &tp_decl[7];
|
|
case j_bool: return &tp_decl[8];
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------
|
|
#ifndef TEST_FMTSTR
|
|
// #define DUMP_FORMATTED 1
|
|
#ifdef DUMP_FORMATTED
|
|
static const char *const _fmt_strings[] =
|
|
{
|
|
"debug",
|
|
"string",
|
|
"quoted",
|
|
"dscr",
|
|
"prefsgn",
|
|
"retdscr",
|
|
"paramstr",
|
|
"throws",
|
|
"clssign",
|
|
"signature",
|
|
"cast",
|
|
"classname",
|
|
"fullname",
|
|
"name",
|
|
};
|
|
static const char *fmt_to_string(fmt_t mode)
|
|
{
|
|
mode = fmt_t(int(mode) & ~FMT_ENC_RESERVED);
|
|
if ( mode > fmt_UnqualifiedName )
|
|
return "<UNKNOWN>";
|
|
return _fmt_strings[mode];
|
|
}
|
|
#endif
|
|
#endif // TEST_FMTSTR
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
struct format_helper_t
|
|
{
|
|
java_t ±
|
|
const ushort *tp;
|
|
uint32 nutf16;
|
|
int strcnt;
|
|
ssize_t size;
|
|
const uint32 off_ReturnType;
|
|
const uint32 off_ThrowsSignature_and_TypeSignature;
|
|
out_java_t *oj;
|
|
_PRMPT_ *putproc;
|
|
fmt_t mode;
|
|
#ifdef DUMP_FORMATTED
|
|
qstring header;
|
|
qwstring input;
|
|
qstring utf8_input;
|
|
qstring collected;
|
|
#endif
|
|
|
|
format_helper_t(
|
|
java_t &_pm,
|
|
const ushort *_tp,
|
|
uint32 _nutf16,
|
|
uint32 _off_ReturnType,
|
|
uint32 _off_ThrowsSignature_and_TypeSignature,
|
|
ssize_t _size,
|
|
fmt_t _mode,
|
|
out_java_t *_oj,
|
|
_PRMPT_ _putproc);
|
|
|
|
int format();
|
|
|
|
private:
|
|
bool parse_ClassSignature();
|
|
bool parse_FormalTypeParameters();
|
|
bool parse_FormalTypeParameter();
|
|
bool parse_SuperclassSignature();
|
|
bool parse_SuperinterfaceSignature();
|
|
bool parse_Identifier();
|
|
bool parse_ClassBound();
|
|
bool parse_InterfaceBound();
|
|
bool parse_FieldTypeSignature();
|
|
bool parse_ClassTypeSignature();
|
|
bool parse_ArrayTypeSignature();
|
|
bool parse_TypeVariableSignature();
|
|
bool parse_SimpleClassTypeSignature();
|
|
bool parse_ClassTypeSignatureSuffix();
|
|
bool parse_TypeSignature();
|
|
bool parse_TypeArgument();
|
|
bool parse_TypeArguments();
|
|
bool parse_TypeArguments_opt();
|
|
bool parse_BaseType();
|
|
bool parse_ReturnType();
|
|
bool parse_ThrowsSignature();
|
|
bool parse_MethodParams();
|
|
bool parse_FieldType();
|
|
bool parse_ArrayType();
|
|
bool parse_ObjectType();
|
|
bool parse_UnqualifiedName();
|
|
bool parse_FieldDescriptor();
|
|
bool parse_MethodDescriptor();
|
|
bool parse_ReturnDescriptor();
|
|
bool parse_ParameterDescriptor();
|
|
|
|
bool is_BaseType(wchar16_t v) const;
|
|
#define PCD_TYPE_ARGUMENTS_ALLOWED 0x1 // '<' and '>' are allowed
|
|
#define PCD_EXPECT_SIGNATURE_BOUNDARIES 0x2 // expects 'L' and ';'
|
|
bool parse_class_desc(uint32 flags);
|
|
|
|
bool ensure_remaining(ssize_t needed);
|
|
wchar16_t lookahead_utf16() const;
|
|
bool discard_utf16_expect(wchar16_t expected);
|
|
bool consume_utf16_expect(wchar16_t expected);
|
|
bool next_cp(wchar32_t *out);
|
|
void maybe_report(bool failure=false) const;
|
|
bool out_cp(wchar32_t cp);
|
|
bool out_param_sep() { return out_line(", ", 2); }
|
|
bool out_array_dim() { return out_line("[]", 2); }
|
|
bool out_line(const char *s, size_t len);
|
|
bool out_escaped_char(uchar cs);
|
|
bool out_utf16_escaped_halfword(wchar16_t hw);
|
|
bool out_unicode_escaped_cp(wchar32_t cp);
|
|
bool out_octal_char(uchar cs);
|
|
enum sfx_type_t
|
|
{
|
|
sfxt_extends = 0,
|
|
sfxt_super,
|
|
sfxt_implements,
|
|
sfxt_throws,
|
|
};
|
|
bool out_sfx(sfx_type_t sfxt);
|
|
|
|
void _out_byte_to_ctx(uchar cs);
|
|
void _out_line_to_ctx(const char *utf8);
|
|
void _badidb(const char *from) const;
|
|
};
|
|
|
|
//-------------------------------------------------------------------------
|
|
format_helper_t::format_helper_t(
|
|
java_t &_pm,
|
|
const ushort *_tp,
|
|
uint32 _nutf16,
|
|
uint32 _off_ReturnType,
|
|
uint32 _off_ThrowsSignature_and_TypeSignature,
|
|
ssize_t _size,
|
|
fmt_t _mode,
|
|
out_java_t *_oj,
|
|
_PRMPT_ _putproc)
|
|
: pm(_pm),
|
|
tp(_tp),
|
|
nutf16(_nutf16),
|
|
strcnt(0),
|
|
size(_size),
|
|
off_ReturnType(_off_ReturnType),
|
|
off_ThrowsSignature_and_TypeSignature(_off_ThrowsSignature_and_TypeSignature),
|
|
oj(_oj),
|
|
putproc(_putproc),
|
|
mode(_mode)
|
|
{
|
|
#ifdef DUMP_FORMATTED
|
|
header.sprnt("nutf16=%u, ", _nutf16);
|
|
header.cat_sprnt("posit=%u, ", _off_ReturnType);
|
|
header.cat_sprnt("possgn=%u, ", _off_ThrowsSignature_and_TypeSignature);
|
|
header.cat_sprnt("mode=%u (%s)", _mode, fmt_to_string(_mode));
|
|
if ( nutf16 > 0 )
|
|
{
|
|
input.resize(_nutf16);
|
|
memcpy(input.begin(), tp, nutf16 * sizeof(ushort));
|
|
}
|
|
utf16_utf8(&utf8_input, (const wchar16_t *) tp, nutf16);
|
|
#endif
|
|
}
|
|
|
|
#define BADIDB() _badidb(__FUNCTION__)
|
|
|
|
#define CHECKED(Expr) \
|
|
do \
|
|
{ \
|
|
if ( !(Expr) ) \
|
|
return false; \
|
|
} while ( false )
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ClassSignature:
|
|
// FormalTypeParameters opt SuperclassSignature SuperinterfaceSignature*
|
|
bool format_helper_t::parse_ClassSignature()
|
|
{
|
|
if ( lookahead_utf16() == j_sign )
|
|
CHECKED(parse_FormalTypeParameters());
|
|
CHECKED(out_sfx(sfxt_extends));
|
|
CHECKED(parse_SuperclassSignature());
|
|
for ( uint32 impl_cnt = 0; lookahead_utf16() == j_class; ++impl_cnt )
|
|
{
|
|
if ( impl_cnt == 0 )
|
|
CHECKED(out_sfx(sfxt_implements));
|
|
else
|
|
CHECKED(out_param_sep());
|
|
CHECKED(parse_SuperinterfaceSignature());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// FormalTypeParameters:
|
|
// < FormalTypeParameter+ >
|
|
bool format_helper_t::parse_FormalTypeParameters()
|
|
{
|
|
CHECKED(consume_utf16_expect(j_sign));
|
|
for ( uint32 cnt = 0; ; ++cnt )
|
|
{
|
|
if ( cnt++ > 0 )
|
|
CHECKED(out_param_sep());
|
|
CHECKED(parse_FormalTypeParameter());
|
|
if ( lookahead_utf16() == j_endsign )
|
|
break;
|
|
}
|
|
CHECKED(consume_utf16_expect(j_endsign));
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// FormalTypeParameter:
|
|
// Identifier ClassBound InterfaceBound*
|
|
bool format_helper_t::parse_FormalTypeParameter()
|
|
{
|
|
CHECKED(parse_Identifier());
|
|
CHECKED(parse_ClassBound());
|
|
for ( uint32 interface_cnt = 0; nutf16 > 0; ++interface_cnt )
|
|
{
|
|
if ( lookahead_utf16() != j_tag )
|
|
break;
|
|
CHECKED(parse_InterfaceBound());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// SuperclassSignature:
|
|
// ClassTypeSignature
|
|
bool format_helper_t::parse_SuperclassSignature()
|
|
{
|
|
CHECKED(parse_ClassTypeSignature());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// SuperinterfaceSignature:
|
|
// ClassTypeSignature
|
|
bool format_helper_t::parse_SuperinterfaceSignature()
|
|
{
|
|
CHECKED(parse_ClassTypeSignature());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// In the following, the terminal symbol Identifier is used to denote the name of a type, field, local
|
|
// variable, parameter, method, or type variable, as generated by a Java compiler. Such a name must
|
|
// not contain any of the ASCII characters . ; [ / < > : (that is, the characters forbidden in method
|
|
// names (§4.2.2) and also colon) but may contain characters that must not appear in an identifier in
|
|
// the Java programming language (JLS §3.8).
|
|
bool format_helper_t::parse_Identifier()
|
|
{
|
|
while ( nutf16 > 0 )
|
|
{
|
|
wchar16_t la = lookahead_utf16();
|
|
switch ( la )
|
|
{
|
|
case j_field_dlm:
|
|
case j_endclass:
|
|
case j_array:
|
|
case j_clspath_dlm:
|
|
case j_sign:
|
|
case j_endsign:
|
|
case j_tag:
|
|
return true;
|
|
}
|
|
wchar32_t cp;
|
|
if ( next_cp(&cp) && is_cp_graphical(cp) )
|
|
CHECKED(out_cp(cp));
|
|
else
|
|
CHECKED(out_unicode_escaped_cp(cp)); // partial codepoint, broken surrogate, ...
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ClassBound:
|
|
// : FieldTypeSignatureopt
|
|
bool format_helper_t::parse_ClassBound()
|
|
{
|
|
CHECKED(discard_utf16_expect(j_tag));
|
|
if ( lookahead_utf16() != j_tag )
|
|
{
|
|
CHECKED(out_sfx(sfxt_extends));
|
|
CHECKED(parse_FieldTypeSignature());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// InterfaceBound:
|
|
// : FieldTypeSignature
|
|
bool format_helper_t::parse_InterfaceBound()
|
|
{
|
|
CHECKED(discard_utf16_expect(j_tag));
|
|
CHECKED(out_sfx(sfxt_implements));
|
|
CHECKED(parse_FieldTypeSignature());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// FieldTypeSignature:
|
|
// ClassTypeSignature
|
|
// ArrayTypeSignature
|
|
// TypeVariableSignature
|
|
bool format_helper_t::parse_FieldTypeSignature()
|
|
{
|
|
wchar16_t la = lookahead_utf16();
|
|
switch ( la )
|
|
{
|
|
case j_class:
|
|
CHECKED(parse_ClassTypeSignature());
|
|
break;
|
|
case j_array:
|
|
CHECKED(parse_ArrayTypeSignature());
|
|
break;
|
|
case j_typeref:
|
|
CHECKED(parse_TypeVariableSignature());
|
|
break;
|
|
case 0:
|
|
default:
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_ClassTypeSignature()
|
|
{
|
|
return parse_class_desc(PCD_TYPE_ARGUMENTS_ALLOWED|PCD_EXPECT_SIGNATURE_BOUNDARIES);
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_ArrayTypeSignature()
|
|
{
|
|
CHECKED(discard_utf16_expect(j_array));
|
|
CHECKED(parse_TypeSignature());
|
|
CHECKED(out_array_dim());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_TypeVariableSignature()
|
|
{
|
|
CHECKED(discard_utf16_expect(j_typeref));
|
|
CHECKED(parse_Identifier());
|
|
CHECKED(discard_utf16_expect(j_endclass));
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// SimpleClassTypeSignature:
|
|
// Identifier TypeArgumentsopt
|
|
bool format_helper_t::parse_SimpleClassTypeSignature()
|
|
{
|
|
CHECKED(parse_Identifier());
|
|
CHECKED(parse_TypeArguments_opt());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ClassTypeSignatureSuffix:
|
|
// . SimpleClassTypeSignature
|
|
bool format_helper_t::parse_ClassTypeSignatureSuffix()
|
|
{
|
|
CHECKED(consume_utf16_expect(j_field_dlm));
|
|
CHECKED(parse_SimpleClassTypeSignature());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// TypeSignature:
|
|
// FieldTypeSignature
|
|
// BaseType
|
|
bool format_helper_t::parse_TypeSignature()
|
|
{
|
|
wchar16_t la = lookahead_utf16();
|
|
if ( is_BaseType(la) )
|
|
CHECKED(parse_BaseType());
|
|
else
|
|
CHECKED(parse_FieldTypeSignature());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// TypeArgument:
|
|
// WildcardIndicator opt FieldTypeSignature
|
|
// *
|
|
bool format_helper_t::parse_TypeArgument()
|
|
{
|
|
wchar16_t la = lookahead_utf16();
|
|
switch ( la )
|
|
{
|
|
case j_wild:
|
|
CHECKED(discard_utf16_expect(la));
|
|
CHECKED(out_cp('?'));
|
|
break;
|
|
case j_wild_e:
|
|
case j_wild_s:
|
|
CHECKED(discard_utf16_expect(la));
|
|
CHECKED(out_cp('?'));
|
|
CHECKED(out_sfx(la == j_wild_s ? sfxt_super : sfxt_extends));
|
|
// fallthrough
|
|
default:
|
|
CHECKED(parse_FieldTypeSignature());
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// TypeArguments:
|
|
// < TypeArgument+ >
|
|
bool format_helper_t::parse_TypeArguments()
|
|
{
|
|
CHECKED(consume_utf16_expect(j_sign));
|
|
uint32 cnt = 0;
|
|
do
|
|
{
|
|
if ( cnt++ > 0 )
|
|
CHECKED(out_param_sep());
|
|
CHECKED(parse_TypeArgument());
|
|
} while ( lookahead_utf16() != j_endsign );
|
|
CHECKED(consume_utf16_expect(j_endsign));
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_TypeArguments_opt()
|
|
{
|
|
if ( lookahead_utf16() == j_sign )
|
|
CHECKED(parse_TypeArguments());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_BaseType()
|
|
{
|
|
wchar32_t cp;
|
|
CHECKED(next_cp(&cp));
|
|
if ( cp >= 0x10000 )
|
|
return false;
|
|
const TXS *tname = get_base_typename(cp, /*or_void=*/ false);
|
|
if ( tname == NULL )
|
|
return false;
|
|
CHECKED(out_line(tname->str, tname->size));
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ReturnType:
|
|
// TypeSignature
|
|
// VoidDescriptor
|
|
bool format_helper_t::parse_ReturnType()
|
|
{
|
|
if ( lookahead_utf16() == j_void_ret )
|
|
{
|
|
const TXS *tname = get_base_typename(j_void_ret, /*or_void=*/ true);
|
|
CHECKED(out_line(tname->str, tname->size));
|
|
CHECKED(discard_utf16_expect(j_void_ret));
|
|
}
|
|
else
|
|
{
|
|
CHECKED(parse_TypeSignature());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ThrowsSignature:
|
|
// ^ ClassTypeSignature
|
|
// ^ TypeVariableSignature
|
|
bool format_helper_t::parse_ThrowsSignature()
|
|
{
|
|
for ( uint32 cnt = 0; nutf16 > 0; ++cnt )
|
|
{
|
|
CHECKED(discard_utf16_expect(j_throw));
|
|
if ( cnt == 0 )
|
|
CHECKED(out_sfx(sfxt_throws));
|
|
else
|
|
CHECKED(out_param_sep());
|
|
if ( lookahead_utf16() == j_class )
|
|
CHECKED(parse_ClassTypeSignature());
|
|
else
|
|
CHECKED(parse_TypeVariableSignature());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_MethodParams()
|
|
{
|
|
CHECKED(consume_utf16_expect(j_parm_list_start));
|
|
for ( uint32 cnt = 0; nutf16 > 0 && lookahead_utf16() != j_parm_list_end; ++cnt )
|
|
{
|
|
if ( cnt > 0 )
|
|
CHECKED(out_param_sep());
|
|
CHECKED(parse_TypeSignature());
|
|
}
|
|
CHECKED(consume_utf16_expect(j_parm_list_end));
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// FieldType:
|
|
// BaseType
|
|
// ObjectType
|
|
// ArrayType
|
|
bool format_helper_t::parse_FieldType()
|
|
{
|
|
const wchar16_t la = lookahead_utf16();
|
|
switch ( la )
|
|
{
|
|
case j_class:
|
|
CHECKED(parse_ObjectType());
|
|
break;
|
|
case j_array:
|
|
CHECKED(parse_ArrayType());
|
|
break;
|
|
default:
|
|
if ( !is_BaseType(la) )
|
|
return false;
|
|
CHECKED(parse_BaseType());
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ArrayType:
|
|
// [ ComponentType
|
|
//
|
|
// where:
|
|
// ComponentType:
|
|
// FieldType
|
|
bool format_helper_t::parse_ArrayType()
|
|
{
|
|
CHECKED(discard_utf16_expect(j_array));
|
|
CHECKED(parse_FieldType());
|
|
CHECKED(out_array_dim());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ObjectType:
|
|
// L ClassName ;
|
|
//
|
|
// where: "The ClassName represents a binary class or interface name encoded in internal form (§4.2.1)."
|
|
bool format_helper_t::parse_ObjectType()
|
|
{
|
|
return parse_class_desc(PCD_TYPE_ARGUMENTS_ALLOWED|PCD_EXPECT_SIGNATURE_BOUNDARIES);
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// Names of methods, fields, and local variables are stored as unqualified
|
|
// names. An unqualified name must not contain any of the ASCII
|
|
// characters . ; [ / (that is, period or semicolon or left square bracket
|
|
// or forward slash). Method names are further constrained so that,
|
|
// with the exception of the special method names <init> and <clinit>
|
|
// (§2.9), they must not contain the ASCII characters < or > (that is,
|
|
// left angle bracket or right angle bracket).
|
|
bool format_helper_t::parse_UnqualifiedName()
|
|
{
|
|
while ( nutf16 > 0 )
|
|
{
|
|
wchar32_t cp;
|
|
if ( next_cp(&cp) && is_cp_graphical(cp) )
|
|
{
|
|
switch ( cp )
|
|
{
|
|
case j_field_dlm:
|
|
case j_endclass:
|
|
case j_array:
|
|
case j_clspath_dlm:
|
|
return false;
|
|
default:
|
|
CHECKED(out_cp(cp));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
CHECKED(out_unicode_escaped_cp(cp)); // partial codepoint, broken surrogate, ...
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_FieldDescriptor()
|
|
{
|
|
return parse_FieldType();
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_MethodDescriptor()
|
|
{
|
|
CHECKED(consume_utf16_expect(j_parm_list_start));
|
|
for ( uint32 cnt = 0; lookahead_utf16() != j_parm_list_end; ++cnt )
|
|
{
|
|
if ( cnt > 0 )
|
|
CHECKED(out_param_sep());
|
|
CHECKED(parse_ParameterDescriptor());
|
|
}
|
|
CHECKED(consume_utf16_expect(j_parm_list_end));
|
|
CHECKED(parse_ReturnDescriptor());
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_ReturnDescriptor()
|
|
{
|
|
if ( lookahead_utf16() == j_void_ret )
|
|
{
|
|
const TXS *tname = get_base_typename(j_void_ret, /*or_void=*/ true);
|
|
CHECKED(out_line(tname->str, tname->size));
|
|
CHECKED(discard_utf16_expect(j_void_ret));
|
|
}
|
|
else
|
|
{
|
|
CHECKED(parse_FieldType());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::parse_ParameterDescriptor()
|
|
{
|
|
return parse_FieldType();
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::is_BaseType(wchar16_t v) const
|
|
{
|
|
return v < 0x100 && get_base_typename(v, /*or_void=*/ false);
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// ClassTypeSignature:
|
|
// L PackageSpecifier opt SimpleClassTypeSignature ClassTypeSignatureSuffix* ;
|
|
//
|
|
// where:
|
|
// PackageSpecifier:
|
|
// Identifier / PackageSpecifier*
|
|
//
|
|
// and:
|
|
// SimpleClassTypeSignature:
|
|
// Identifier TypeArgumentsopt
|
|
//
|
|
// and:
|
|
// TypeArguments:
|
|
// < TypeArgument+ >
|
|
//
|
|
// Note that there is some ambiguity wrt the optional 'PackageSpecifier':
|
|
// it's impossible to know whether what we are parsing is the identifier
|
|
// of a PackageSpecifier, or that of a SimpleClassTypeSignature.
|
|
//
|
|
//
|
|
// Must also support binary class/interface names:
|
|
//
|
|
// 4.2.1. Binary Class and Interface Names
|
|
//
|
|
// Class and interface names that appear in class file structures
|
|
// are always represented in a fully qualified form known as binary
|
|
// names (JLS §13.1). Such names are always represented as CONSTANT_Utf8_info
|
|
// structures (§4.4.7) and thus may be drawn, where not further constrained,
|
|
// from the entire Unicode codespace.
|
|
// Class and interface names are referenced from those
|
|
// CONSTANT_NameAndType_info structures (§4.4.6) which have such names as
|
|
// part of their descriptor (§4.3), and from all CONSTANT_Class_info
|
|
// structures (§4.4.1).
|
|
//
|
|
// For historical reasons, the syntax of binary names that appear in
|
|
// class file structures differs from the syntax of binary names documented
|
|
// in JLS §13.1. In this internal form, the ASCII periods (.) that normally
|
|
// separate the identifiers which make up the binary name are replaced
|
|
// by ASCII forward slashes (/). The identifiers themselves must be
|
|
// unqualified names (§4.2.2).
|
|
//
|
|
// For example, the normal binary name of class Thread is java.lang.Thread.
|
|
// In the internal form used in descriptors in the class file format, a
|
|
// reference to the name of class Thread is implemented using a
|
|
// CONSTANT_Utf8_info structure representing the string java/lang/Thread.
|
|
bool format_helper_t::parse_class_desc(uint32 flags)
|
|
{
|
|
const bool type_arguments_allowed = (flags & PCD_TYPE_ARGUMENTS_ALLOWED) != 0;
|
|
const bool expect_signature_boundaries = (flags & PCD_EXPECT_SIGNATURE_BOUNDARIES) != 0;
|
|
if ( expect_signature_boundaries )
|
|
CHECKED(discard_utf16_expect(j_class));
|
|
bool reached_suffix = false;
|
|
while ( !reached_suffix )
|
|
{
|
|
CHECKED(parse_Identifier());
|
|
wchar16_t la = lookahead_utf16();
|
|
switch ( la )
|
|
{
|
|
case j_clspath_dlm:
|
|
CHECKED(out_cp('.'));
|
|
CHECKED(discard_utf16_expect(j_clspath_dlm));
|
|
break;
|
|
case j_sign:
|
|
if ( type_arguments_allowed )
|
|
{
|
|
CHECKED(parse_TypeArguments());
|
|
goto parse_class_desc_ok;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
case j_field_dlm:
|
|
reached_suffix = true;
|
|
break;
|
|
case j_endclass:
|
|
goto parse_class_desc_ok;
|
|
default:
|
|
if ( la == 0 && !expect_signature_boundaries )
|
|
goto parse_class_desc_ok;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// If we are here, it means we reached 'ClassTypeSignatureSuffix*'
|
|
while ( true )
|
|
{
|
|
wchar16_t la = lookahead_utf16();
|
|
if ( la == j_endclass )
|
|
goto parse_class_desc_ok;
|
|
if ( la == j_field_dlm )
|
|
CHECKED(parse_ClassTypeSignatureSuffix());
|
|
else
|
|
return false; // unexpected codepoint
|
|
}
|
|
|
|
parse_class_desc_ok:
|
|
if ( expect_signature_boundaries )
|
|
CHECKED(discard_utf16_expect(j_endclass));
|
|
return true;
|
|
}
|
|
|
|
#undef CHECKED
|
|
|
|
//-------------------------------------------------------------------------
|
|
int format_helper_t::format()
|
|
{
|
|
bool ok = false;
|
|
if ( mode == fmt_ClassSignature )
|
|
{
|
|
switch ( lookahead_utf16() )
|
|
{
|
|
case j_class:
|
|
ok = parse_ClassTypeSignature();
|
|
break;
|
|
case j_sign:
|
|
ok = parse_ClassSignature();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else if ( mode == fmt_method_FormalTypeParameters
|
|
|| mode == fmt_method_TypeSignature
|
|
|| mode == fmt_method_ReturnType
|
|
|| mode == fmt_method_ThrowsSignature )
|
|
{
|
|
// MethodTypeSignature:
|
|
// FormalTypeParameters opt (TypeSignature*) ReturnType ThrowsSignature*
|
|
if ( !off_ReturnType || off_ReturnType >= nutf16 || tp[off_ReturnType-1] != j_parm_list_end )
|
|
BADIDB();
|
|
|
|
ushort off_TypeSignature = ushort(off_ThrowsSignature_and_TypeSignature);
|
|
ushort off_ThrowsSignature = off_ThrowsSignature_and_TypeSignature >> 16;
|
|
if ( off_ThrowsSignature_and_TypeSignature > 0 )
|
|
{
|
|
if ( off_TypeSignature >= off_ReturnType
|
|
|| tp[off_TypeSignature] != j_parm_list_start )
|
|
{
|
|
BADIDB();
|
|
}
|
|
if ( off_ThrowsSignature > 0 )
|
|
{
|
|
if ( off_ThrowsSignature <= off_ReturnType
|
|
|| off_ThrowsSignature >= nutf16
|
|
|| tp[off_ThrowsSignature] != j_throw )
|
|
{
|
|
BADIDB();
|
|
}
|
|
}
|
|
}
|
|
switch ( mode )
|
|
{
|
|
case fmt_method_FormalTypeParameters:
|
|
if ( off_TypeSignature == 0 )
|
|
return 0;
|
|
if ( *tp != j_sign )
|
|
BADIDB();
|
|
nutf16 = off_TypeSignature;
|
|
ok = parse_FormalTypeParameters();
|
|
break;
|
|
case fmt_method_TypeSignature:
|
|
nutf16 = off_ReturnType - off_TypeSignature;
|
|
tp += off_TypeSignature;
|
|
ok = parse_MethodParams();
|
|
break;
|
|
case fmt_method_ReturnType:
|
|
if ( off_ThrowsSignature > 0 )
|
|
nutf16 = off_ThrowsSignature;
|
|
nutf16 -= off_ReturnType;
|
|
tp += off_ReturnType;
|
|
ok = parse_ReturnType();
|
|
break;
|
|
case fmt_method_ThrowsSignature:
|
|
if ( off_ThrowsSignature == 0 )
|
|
return 0;
|
|
nutf16 -= off_ThrowsSignature;
|
|
tp += off_ThrowsSignature;
|
|
ok = parse_ThrowsSignature();
|
|
break;
|
|
|
|
default: INTERR(10332);
|
|
}
|
|
if ( ok && mode != fmt_method_TypeSignature )
|
|
ok = out_cp(' ');
|
|
}
|
|
else if ( mode == fmt_FieldDescriptor_nospace )
|
|
{
|
|
switch ( lookahead_utf16() )
|
|
{
|
|
case j_class:
|
|
ok = parse_ClassTypeSignature();
|
|
break;
|
|
case j_sign:
|
|
ok = parse_ClassSignature();
|
|
break;
|
|
case j_parm_list_start:
|
|
ok = parse_MethodDescriptor();
|
|
break;
|
|
default:
|
|
ok = parse_TypeSignature();
|
|
break;
|
|
}
|
|
}
|
|
else if ( mode == fmt_FieldDescriptor )
|
|
{
|
|
ok = parse_FieldDescriptor() && out_cp(' ');
|
|
}
|
|
else if ( mode == fmt_fullname )
|
|
{
|
|
switch ( lookahead_utf16() )
|
|
{
|
|
case j_parm_list_start:
|
|
ok = parse_MethodDescriptor();
|
|
break;
|
|
case j_class:
|
|
ok = parse_ClassTypeSignature();
|
|
break;
|
|
case j_array:
|
|
ok = parse_ArrayType();
|
|
break;
|
|
default:
|
|
ok = parse_class_desc(0);
|
|
break;
|
|
}
|
|
}
|
|
else if ( mode == fmt_ClassName
|
|
|| mode == fmt_ClassName_or_Array )
|
|
{
|
|
ok = lookahead_utf16() == j_array
|
|
? parse_ArrayType()
|
|
: parse_class_desc(0);
|
|
}
|
|
else if ( mode == fmt_UnqualifiedName )
|
|
{
|
|
ok = parse_UnqualifiedName();
|
|
}
|
|
else if ( mode == fmt_debug || mode == fmt_string || mode == fmt_string_single_quotes )
|
|
{
|
|
uchar quotation = '"';
|
|
switch ( mode )
|
|
{
|
|
case fmt_debug:
|
|
if ( !pm.is_multiline_debug() )
|
|
mode = fmt_string; // optimize
|
|
break;
|
|
case fmt_string:
|
|
if ( pm.is_fmt_string_as_fmt_debug() )
|
|
mode = fmt_debug;
|
|
break;
|
|
case fmt_string_single_quotes:
|
|
quotation = '\'';
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
ok = out_cp(quotation);
|
|
while ( ok && nutf16 > 0 )
|
|
{
|
|
wchar32_t cp;
|
|
if ( !next_cp(&cp) )
|
|
{
|
|
ok = out_unicode_escaped_cp(cp); // partial codepoint, broken surrogate, ...
|
|
}
|
|
else
|
|
{
|
|
if ( cp >= 0x100 )
|
|
{
|
|
if ( !is_cp_graphical(cp) )
|
|
ok = out_unicode_escaped_cp(cp);
|
|
else
|
|
ok = out_cp(cp);
|
|
continue;
|
|
}
|
|
else if ( cp >= CHP_MAX )
|
|
{
|
|
ok = out_octal_char(cp);
|
|
continue;
|
|
}
|
|
else if ( cp >= ' ' )
|
|
{
|
|
if ( cp == '\\' || cp == '"' )
|
|
ok = out_escaped_char(cp);
|
|
else
|
|
ok = out_cp(cp);
|
|
continue;
|
|
}
|
|
else if ( cp < 0xD )
|
|
{
|
|
if ( cp < 8 || cp == 0xB )
|
|
goto checkdig;
|
|
{
|
|
static const char casc[(0xD-8)+1] = { 'b', 't', 'n', '?', 'f', 'r' };
|
|
cp = casc[cp-8]; //lint !e676 possibly indexing before the beginning of an allocation
|
|
if ( cp == 'n'
|
|
&& mode == fmt_debug
|
|
&& nutf16
|
|
&& size > 2 )
|
|
{
|
|
size = 2;
|
|
}
|
|
}
|
|
ok = out_escaped_char(cp);
|
|
continue;
|
|
}
|
|
checkdig:
|
|
if ( nutf16 > 0 && *tp <= '7' && *tp >= '0' )
|
|
{
|
|
ok = out_octal_char(cp);
|
|
}
|
|
else
|
|
{
|
|
if ( cp <= 7 )
|
|
{
|
|
ok = out_escaped_char(cp + '0');
|
|
}
|
|
else
|
|
{
|
|
char _buf[MAXSTR];
|
|
int _buflen = qsnprintf(_buf, sizeof(_buf), "\\%o", cp);
|
|
ok = out_line(_buf, _buflen);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ( ok )
|
|
ok = out_cp(quotation);
|
|
}
|
|
maybe_report(/*failure=*/ !ok);
|
|
return strcnt;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::ensure_remaining(ssize_t needed)
|
|
{
|
|
if ( size < needed )
|
|
{
|
|
size = putproc(pm, oj);
|
|
if ( size == 0 )
|
|
return false;
|
|
else
|
|
++strcnt;
|
|
}
|
|
size -= needed;
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
wchar16_t format_helper_t::lookahead_utf16() const
|
|
{
|
|
return nutf16 > 0 ? *tp : 0;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::discard_utf16_expect(wchar16_t expected)
|
|
{
|
|
if ( nutf16 == 0 || *tp != expected )
|
|
return false;
|
|
--nutf16;
|
|
++tp;
|
|
return true;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::consume_utf16_expect(wchar16_t expected)
|
|
{
|
|
bool rc = discard_utf16_expect(expected);
|
|
if ( rc )
|
|
out_cp(expected);
|
|
return rc;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::next_cp(wchar32_t *out)
|
|
{
|
|
bool ok = true;
|
|
wchar32_t cp = *tp++;
|
|
--nutf16;
|
|
if ( is_tail_surrogate(cp) )
|
|
{
|
|
ok = false;
|
|
}
|
|
else if ( is_lead_surrogate(cp) )
|
|
{
|
|
ok = nutf16 > 0;
|
|
if ( ok )
|
|
{
|
|
wchar16_t lookahead = *tp;
|
|
ok = is_tail_surrogate(lookahead);
|
|
if ( ok )
|
|
{
|
|
cp = utf16_surrogates_to_cp(cp, lookahead);
|
|
++tp;
|
|
--nutf16;
|
|
}
|
|
}
|
|
}
|
|
*out = cp;
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
void format_helper_t::maybe_report(bool failure) const
|
|
{
|
|
#ifdef DUMP_FORMATTED
|
|
if ( collected != utf8_input && !utf8_input.empty() )
|
|
{
|
|
qstring notag_collected;
|
|
tag_remove(¬ag_collected, collected);
|
|
if ( under_debugger && failure )
|
|
BPT;
|
|
msg("\n#%s %s", failure ? "FAILING_INPUT" : "FMTSTRING", header.c_str());
|
|
|
|
qstring utf8_input_user, notag_collected_user;
|
|
qstr2user(&utf8_input_user, utf8_input);
|
|
qstr2user(¬ag_collected_user, notag_collected);
|
|
qstring serialized_input;
|
|
for ( size_t i = 0, n = input.length(); i < n; ++i )
|
|
{
|
|
ushort cw = input[i];
|
|
if ( cw >= ' ' && cw < 0x7f && qisprint(char(cw)) && cw != '\\' )
|
|
serialized_input.append(char(cw));
|
|
else
|
|
serialized_input.cat_sprnt("\\u%04X", cw);
|
|
}
|
|
msg("#\x01%s\x01%s\x01%s\n",
|
|
utf8_input_user.c_str(),
|
|
serialized_input.c_str(),
|
|
notag_collected_user.c_str());
|
|
}
|
|
#else
|
|
qnotused(failure);
|
|
#endif // DUMP_FORMATTED
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_cp(wchar32_t cp)
|
|
{
|
|
char utf8[MAX_UTF8_SEQ_LEN];
|
|
ssize_t nbytes = put_utf8_char(utf8, cp);
|
|
if ( nbytes < 0 )
|
|
nbytes = put_utf8_char(utf8, CP_REPLCHAR);
|
|
if ( nbytes < 0 ) // PARANOYA
|
|
utf8[0] = '\0';
|
|
bool ok = ensure_remaining(1);
|
|
if ( ok )
|
|
_out_line_to_ctx(utf8);
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_line(const char *s, size_t len)
|
|
{
|
|
bool ok = ensure_remaining(len);
|
|
if ( ok )
|
|
_out_line_to_ctx(s);
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_escaped_char(uchar cs)
|
|
{
|
|
bool ok = ensure_remaining(2);
|
|
if ( ok )
|
|
{
|
|
_out_byte_to_ctx('\\');
|
|
_out_byte_to_ctx(cs);
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_utf16_escaped_halfword(wchar16_t hw)
|
|
{
|
|
char buf[32];
|
|
int buflen = qsnprintf(buf, sizeof(buf), "\\u%04X", hw);
|
|
bool ok = ensure_remaining(buflen);
|
|
if ( ok )
|
|
_out_line_to_ctx(buf);
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_unicode_escaped_cp(wchar32_t cp)
|
|
{
|
|
if ( cp < 0x10000 )
|
|
{
|
|
return out_utf16_escaped_halfword(cp);
|
|
}
|
|
else
|
|
{
|
|
wchar16_t leading = 0xD800 + (((cp - 0x10000) >> 10) & 0x3FF);
|
|
wchar16_t tailing = 0xDC00 + (cp & 0x3FF);
|
|
return out_utf16_escaped_halfword(leading)
|
|
&& out_utf16_escaped_halfword(tailing);
|
|
}
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_octal_char(uchar cs)
|
|
{
|
|
char buf[32];
|
|
int buflen = qsnprintf(buf, sizeof(buf), "\\%.3o", cs);
|
|
bool ok = ensure_remaining(buflen);
|
|
if ( ok )
|
|
_out_line_to_ctx(buf);
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
bool format_helper_t::out_sfx(sfx_type_t sfxt)
|
|
{
|
|
static const TXS sfx[4] =
|
|
{
|
|
TXS_DECLARE(" extends "),
|
|
TXS_DECLARE(" super "),
|
|
TXS_DECLARE(" implements "),
|
|
TXS_DECLARE(" throws ")
|
|
};
|
|
|
|
bool ok = ensure_remaining(sfx[sfxt].size);
|
|
if ( ok )
|
|
_out_line_to_ctx(sfx[sfxt].str);
|
|
return ok;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
void format_helper_t::_out_byte_to_ctx(uchar cs)
|
|
{
|
|
oj->out_char(cs);
|
|
#ifdef DUMP_FORMATTED
|
|
collected.append(cs);
|
|
#endif
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
void format_helper_t::_out_line_to_ctx(const char *utf8)
|
|
{
|
|
oj->out_line(utf8);
|
|
#ifdef DUMP_FORMATTED
|
|
collected.append(utf8);
|
|
#endif
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
void format_helper_t::_badidb(const char *from) const //lint !e715 not referenced
|
|
{
|
|
qnotused(from);
|
|
DESTROYED(from);
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
int java_t::format_utf16_string(
|
|
const ushort *tp,
|
|
uint32 nutf16,
|
|
uint32 off_ReturnType,
|
|
uint32 off_ThrowsSignature_and_TypeSignature,
|
|
ssize_t size,
|
|
fmt_t mode,
|
|
out_java_t *oj,
|
|
_PRMPT_ putproc)
|
|
{
|
|
format_helper_t helper(*this, tp, nutf16, off_ReturnType, off_ThrowsSignature_and_TypeSignature, size, mode, oj, putproc);
|
|
return helper.format();
|
|
}
|