diff --git a/compiler2/Type_codegen.cc b/compiler2/Type_codegen.cc index a0716185a67b99091ffb5be21c6a1c495295a163..7da4282e0836d01ec3230e8fa46e5dfdaa1db8c3 100644 --- a/compiler2/Type_codegen.cc +++ b/compiler2/Type_codegen.cc @@ -712,7 +712,10 @@ void Type::generate_code_rawdescriptor(output_struct *target) str = mputprintf(str, "%s,", my_scope->get_scope_mod_gen() ->add_padding_pattern(string(rawattrib->padding_pattern)).c_str()); else str = mputstr(str, "NULL,"); - str = mputprintf(str, "%d};\n", rawattrib->length_restrition); + str = mputprintf(str, "%d,", rawattrib->length_restrition); + str = mputprintf(str, "CharCoding::%s};\n", + rawattrib->stringformat == CharCoding::UTF_8 ? "UTF_8" : + (rawattrib->stringformat == CharCoding::UTF16 ? "UTF16" : "UNKNOWN")); target->source.global_vars = mputstr(target->source.global_vars, str); Free(str); } diff --git a/compiler2/ttcn3/RawAST.cc b/compiler2/ttcn3/RawAST.cc index 4975ecf32cf9c0a6b085df3778c094455fe7e5b1..c0aff1306e9b5583110003562e7ce18fa8d76bf3 100644 --- a/compiler2/ttcn3/RawAST.cc +++ b/compiler2/ttcn3/RawAST.cc @@ -65,6 +65,7 @@ RawAST::RawAST(RawAST *other,bool int_type){ toplevel.bitorder=other->toplevel.bitorder; length_restrition=other->length_restrition; intx = other->intx; + stringformat = other->stringformat; } else init_rawast(int_type); } @@ -104,6 +105,7 @@ void RawAST::init_rawast(bool int_type){ presence.keyList=NULL; topleveleind=0; intx = false; + stringformat = CharCoding::UNKNOWN; } RawAST::~RawAST(){ @@ -193,6 +195,8 @@ void RawAST::print_RawAST(){ } } printf("%sIntX encoding\n\r", intx ? "" : "not "); + printf("String format: %s\n\r", stringformat == CharCoding::UTF_8 ? "UTF-8" : + (stringformat == CharCoding::UTF16 ? "UTF-16" : "unknown")); } void copy_rawAST_to_struct(RawAST *from, raw_attrib_struct *to){ diff --git a/compiler2/ttcn3/RawAST.hh b/compiler2/ttcn3/RawAST.hh index 753e3a57833293e38accf0be2e9c927830078fa4..502928f81a6a4dcb0da1a43e5072aa1a7a0b54d2 100644 --- a/compiler2/ttcn3/RawAST.hh +++ b/compiler2/ttcn3/RawAST.hh @@ -21,6 +21,7 @@ #include "rawASTspec.h" #include "../Setting.hh" #include "../Identifier.hh" +#include "../../common/CharCoding.hh" class XerAttributes; class TextAST; @@ -115,6 +116,8 @@ public: rawAST_toplevel toplevel; /**< Toplevel attributes */ int length_restrition; bool intx; /**< IntX encoding for integers */ + CharCoding::CharCodingType stringformat; /**< String serialization type for + * universal charstrings */ /** Default constructor. * Calls \c init_rawast(false). * \todo should be merged with the next one */ diff --git a/compiler2/ttcn3/rawAST.l b/compiler2/ttcn3/rawAST.l index 1fdb1f774cfcbe4fbb13cb2b3e1653d0863489f4..95745828831c19afd41ca9f437d9271f106fcd82 100644 --- a/compiler2/ttcn3/rawAST.l +++ b/compiler2/ttcn3/rawAST.l @@ -299,6 +299,10 @@ TOPLEVEL { BEGIN(rawcodec); RETURN(XToplevelKeyword); } IntX { RETURN(XIntXKeyword); } bit { RETURN(XBitKeyword); } unsigned { RETURN(XUnsignedKeyword); } +"UTF-8" { RETURN(XUTF8Keyword); } +"UTF-16" { RETURN(XUTF16Keyword); } +"IEEE754 float" { RETURN(XIEEE754FloatKeyword); } +"IEEE754 double" { RETURN(XIEEE754DoubleKeyword); } <rawcodec>{ yes { yylval.enumval = XDEFYES; RETURN(XYes); } diff --git a/compiler2/ttcn3/rawAST.y b/compiler2/ttcn3/rawAST.y index 945b4bc933a52e0b12c65752a1beb1a2398fd5b2..514a43d295764bf600358e5325b3ca0810eee149 100644 --- a/compiler2/ttcn3/rawAST.y +++ b/compiler2/ttcn3/rawAST.y @@ -163,6 +163,10 @@ static void yyprint(FILE *file, int type, const YYSTYPE& value); %token XIntXKeyword %token XBitKeyword %token XUnsignedKeyword +%token XUTF8Keyword +%token XUTF16Keyword +%token XIEEE754FloatKeyword +%token XIEEE754DoubleKeyword /* XER attributes */ %token XKWall "all" @@ -478,6 +482,10 @@ XSingleEncodingDef : XPaddingDef { rawstruct->intx = true; raw_f = true; } | XBitDef { raw_f = true; } + | XUTFDef + { raw_f = true; } + | XIEEE754Def + { raw_f = true; } /* TEXT encoder keywords */ | XBeginDef { text_f=true; } @@ -870,6 +878,14 @@ XBitDef: } ; +XUTFDef: + XUTF8Keyword { rawstruct->stringformat = CharCoding::UTF_8; } +| XUTF16Keyword { rawstruct->stringformat = CharCoding::UTF16; } + +XIEEE754Def: + XIEEE754FloatKeyword { rawstruct->fieldlength = 32; } +| XIEEE754DoubleKeyword { rawstruct->fieldlength = 64; } + /* Text encoder */ XBeginDef: XBeginKeyword '(' XEncodeToken ')' { diff --git a/core/RAW.cc b/core/RAW.cc index db313601ea01cd89671e17dff96a53fdf5dd4208..de81363ef885c81613695783d54583ad699bab46 100644 --- a/core/RAW.cc +++ b/core/RAW.cc @@ -437,11 +437,11 @@ int min_of_ints(int num_of_int,...) * | | padding_pattern_length * | | padding_pattern * length,comp ,byteorder,align ,ord_field,ord_octet,ext_bit ,hexorder,fieldorder,top_bit, | | length_restriction*/ -const TTCN_RAWdescriptor_t INTEGER_raw_= {8,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t BOOLEAN_raw_= {1,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t BITSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t OCTETSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t HEXSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t CHARSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t FLOAT_raw_= {64,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; -const TTCN_RAWdescriptor_t UNIVERSAL_CHARSTRING_raw_ = {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1}; +const TTCN_RAWdescriptor_t INTEGER_raw_= {8,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t BOOLEAN_raw_= {1,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t BITSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t OCTETSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t HEXSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t CHARSTRING_raw_= {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t FLOAT_raw_= {64,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; +const TTCN_RAWdescriptor_t UNIVERSAL_CHARSTRING_raw_ = {0,SG_NO,ORDER_LSB,ORDER_LSB,ORDER_LSB,ORDER_LSB,EXT_BIT_NO,ORDER_LSB,ORDER_LSB,TOP_BIT_INHERITED,0,0,0,8,0,NULL,-1,CharCoding::UNKNOWN}; diff --git a/core/RAW.hh b/core/RAW.hh index 3557424ca862da6b99f1902c849ab848cd4870a5..79ae911fe2081312b3acd47be9054d5109310124 100644 --- a/core/RAW.hh +++ b/core/RAW.hh @@ -20,6 +20,7 @@ #include "Types.h" #include "Encdec.hh" +#include "CharCoding.hh" struct bignum_st; typedef bignum_st BIGNUM; @@ -114,6 +115,7 @@ struct TTCN_RAWdescriptor_t{ int padding_pattern_length; const unsigned char* padding_pattern; int length_restrition; + CharCoding::CharCodingType stringformat; }; enum calc_type { diff --git a/core/Universal_charstring.cc b/core/Universal_charstring.cc index 4d9957f067b38f89a1de1d2865f6b0363fc9faf0..df2e416dee9ce14353d1b5564b03e78e27c64ded 100644 --- a/core/Universal_charstring.cc +++ b/core/Universal_charstring.cc @@ -2309,7 +2309,18 @@ int UNIVERSAL_CHARSTRING::RAW_encode(const TTCN_Typedescriptor_t& p_td, return cstr.RAW_encode(p_td, myleaf); } TTCN_Buffer buff; - encode_utf8(buff); + switch (p_td.raw->stringformat) { + case CharCoding::UNKNOWN: // default is UTF-8 + case CharCoding::UTF_8: + encode_utf8(buff); + break; + case CharCoding::UTF16: + encode_utf16(buff, CharCoding::UTF16); + break; + default: + TTCN_EncDec_ErrorContext::error(TTCN_EncDec::ET_INTERNAL, + "Invalid string serialization type."); + } int buff_len = buff.get_len(); int bl = buff_len * 8; // bit length int align_length = p_td.raw->fieldlength ? p_td.raw->fieldlength - bl : 0; @@ -2343,11 +2354,29 @@ int UNIVERSAL_CHARSTRING::RAW_decode(const TTCN_Typedescriptor_t& p_td, break; } } - if (charstring) { - cstr = buff_str; - } - else { - decode_utf8(buff_str.val_ptr->n_chars, (const unsigned char*)buff_str.val_ptr->chars_ptr); + switch (p_td.raw->stringformat) { + case CharCoding::UNKNOWN: // default is UTF-8 + case CharCoding::UTF_8: + if (charstring) { + cstr = buff_str; + } + else { + decode_utf8(buff_str.val_ptr->n_chars, (const unsigned char*)buff_str.val_ptr->chars_ptr); + } + break; + case CharCoding::UTF16: + if (!charstring) { + decode_utf16(buff_str.val_ptr->n_chars, + (const unsigned char*)buff_str.val_ptr->chars_ptr, CharCoding::UTF16); + } + else { + TTCN_EncDec_ErrorContext::error(TTCN_EncDec::ET_INVAL_MSG, + "Invalid string format. Buffer contains only ASCII characters."); + } + break; + default: + TTCN_EncDec_ErrorContext::error(TTCN_EncDec::ET_INTERNAL, + "Invalid string serialization type."); } } return dec_len; diff --git a/regression_test/RAW/Annex_E_variants/Annex_E_variants.ttcn b/regression_test/RAW/Annex_E_variants/Annex_E_variants.ttcn index 2cf3c69f61a80b8869a0c2052aa48d4e6fdcf814..c2715132e6daadb40f7e988252074823c458541a 100644 --- a/regression_test/RAW/Annex_E_variants/Annex_E_variants.ttcn +++ b/regression_test/RAW/Annex_E_variants/Annex_E_variants.ttcn @@ -14,11 +14,17 @@ // This module contains tests for the variant attributes defined in Annex E of the // TTCN-3 language standard. // The encoding of the types with the new variants are compared to the encoding of -// types declared using traditional RAW variant attributes. +// types declared using traditional RAW variant attributes (whenever possible). // New attributes and their equivalents: // "N bit" == "FIELDLENGTH(N), COMP(signbit), BYTEORDER(last)" // "unsigned N bit" == "FIELDLENGTH(N), COMP(nosign), BYTEORDER(last)" +// "IEEE754 float" == "FORMAT(IEEE754 float)" +// "IEEE754 double" == "FORMAT(IEEE754 double)" + +// Other new attributes and their meaning: +// "UTF-8", "UTF-16" - universal charstring encoding types, they are equivalent to calling +// oct2unichar or unichar2oct module Annex_E_variants { // E.2.1.0 Signed and unsigned single byte integers @@ -89,6 +95,19 @@ type charstring CStr64Eq with { variant "FIELDLENGTH(8), BYTEORDER(last)" }; type universal charstring UStr80 with { variant "80 bit" }; // 80 bits = 10 UTF-8 characters type universal charstring UStr80Eq with { variant "FIELDLENGTH(10), BYTEORDER(last)" }; +// E.2.1.4 IEEE 754 (TM) floats +type float IEEE754Float with { variant "IEEE754 float" }; +type float IEEE754FloatEq with { variant "FORMAT(IEEE754 float)" }; + +type float IEEE754Double with { variant "IEEE754 double" }; +type float IEEE754DoubleEq with { variant "FORMAT(IEEE754 double)" }; + +// E.2.2.0 UTF-8 characters string +type universal charstring UTF8String with { variant "UTF-8" }; + +// E.2.2.1 and E.2.2.2 UTF-16 characters string +type universal charstring UTF16String with { variant "UTF-16" }; + // Component type type component CT {} @@ -515,6 +534,90 @@ testcase tc_ustr80() runs on CT setverdict(pass); } +testcase tc_ieee754_float() runs on CT +{ + var IEEE754Float x := -391.582; + var IEEE754FloatEq x_eq := x; + var bitstring enc := encvalue(x); + var bitstring enc_exp := encvalue(x_eq); + if (enc != enc_exp) { + setverdict(fail, "Expected: ", enc_exp, ", got: ", enc); + } + else { + var IEEE754Float dec; + var float eps := 0.0001; + if (decvalue(enc, dec) != 0) { + setverdict(fail, "Could not decode ", enc); + } + else if (dec - x > eps or dec - x < -eps) { + setverdict(fail, "Expected: ", x, ", got: ", dec); + } + } + setverdict(pass); +} + +testcase tc_ieee754_double() runs on CT +{ + var IEEE754Double x := 1165.141997; + var IEEE754DoubleEq x_eq := x; + var bitstring enc := encvalue(x); + var bitstring enc_exp := encvalue(x_eq); + if (enc != enc_exp) { + setverdict(fail, "Expected: ", enc_exp, ", got: ", enc); + } + else { + var IEEE754Double dec; + var float eps := 0.000001; + if (decvalue(enc, dec) != 0) { + setverdict(fail, "Could not decode ", enc); + } + else if (dec - x > eps or dec - x < -eps) { + setverdict(fail, "Expected: ", x, ", got: ", dec); + } + } + setverdict(pass); +} + +testcase tc_utf8() runs on CT +{ + var UTF8String x := "árvÃztűrÅ‘ tükörfúrógép"; + var bitstring enc := encvalue(x); + var bitstring enc_exp := oct2bit(unichar2oct(x, "UTF-8")); + if (enc != enc_exp) { + setverdict(fail, "Expected: ", enc_exp, ", got: ", enc); + } + else { + var UTF8String dec; + if (decvalue(enc, dec) != 0) { + setverdict(fail, "Could not decode ", enc); + } + else if (dec != x) { + setverdict(fail, "Expected: ", x, ", got: ", dec); + } + } + setverdict(pass); +} + +testcase tc_utf16() runs on CT +{ + var UTF16String x := "árvÃztűrÅ‘ tükörfúrógép"; + var bitstring enc := encvalue(x); + var bitstring enc_exp := oct2bit(unichar2oct(x, "UTF-16")); + if (enc != enc_exp) { + setverdict(fail, "Expected: ", enc_exp, ", got: ", enc); + } + else { + var UTF16String dec; + if (decvalue(enc, dec) != 0) { + setverdict(fail, "Could not decode ", enc); + } + else if (dec != x) { + setverdict(fail, "Expected: ", x, ", got: ", dec); + } + } + setverdict(pass); +} + // Control part control { execute(tc_byte()); @@ -537,6 +640,10 @@ control { execute(tc_ostr32()); execute(tc_cstr64()); execute(tc_ustr80()); + execute(tc_ieee754_float()); + execute(tc_ieee754_double()); + execute(tc_utf8()); + execute(tc_utf16()); } }