-
Adam Knapp authored
Change-Id: Iacba53d8499439eba82045c30a1e0f2e0edf16fe Signed-off-by:
Adam Knapp <adam.knapp@sigmatechnology.se>
Adam Knapp authoredChange-Id: Iacba53d8499439eba82045c30a1e0f2e0edf16fe Signed-off-by:
Adam Knapp <adam.knapp@sigmatechnology.se>
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
TextAST.cc 15.93 KiB
/******************************************************************************
* Copyright (c) 2000-2021 Ericsson Telecom AB
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.html
*
* Contributors:
* Balasko, Jeno
* Raduly, Csaba
* Szabo, Janos Zoltan – initial implementation
* Szalai, Gabor
*
******************************************************************************/
#include "TextAST.hh"
#include <stdio.h>
#include "common/pattern.hh"
#include "common/pattern_p.hh"
void TextAST::init_TextAST()
{
begin_val=NULL;
end_val=NULL;
separator_val=NULL;
nof_field_params=0;
field_params=NULL;
true_params=NULL;
false_params=NULL;
coding_params.leading_zero=false;
coding_params.repeatable=false;
coding_params.min_length=-1;
coding_params.max_length=-1;
coding_params.convert=0;
coding_params.just=1;
decoding_params.leading_zero=false;
decoding_params.repeatable=false;
decoding_params.min_length=-1;
decoding_params.max_length=-1;
decoding_params.convert=0;
decoding_params.just=1;
decode_token=NULL;
case_sensitive=true;
}
TextAST::TextAST(const TextAST *other_val)
{
init_TextAST();
if(other_val){
//printf("HALI\n\r");
//other_val->print_TextAST();
copy_textAST_matching_values(&begin_val,other_val->begin_val);
copy_textAST_matching_values(&end_val,other_val->end_val);
copy_textAST_matching_values(&separator_val,other_val->separator_val);
nof_field_params=other_val->nof_field_params;
if(nof_field_params) field_params=
(textAST_enum_def**)Malloc(nof_field_params*sizeof(textAST_enum_def*));
for(size_t a=0;a<nof_field_params;a++){
if(other_val->field_params[a]){
field_params[a]=(textAST_enum_def*)Malloc(sizeof(textAST_enum_def));
field_params[a]->name=
new Common::Identifier(*(other_val->field_params[a]->name));
textAST_matching_values *b=&(field_params[a]->value);
copy_textAST_matching_values(&b,&other_val->field_params[a]->value);
} else field_params[a]=NULL;
}
//print_TextAST();
copy_textAST_matching_values(&true_params,other_val->true_params);
copy_textAST_matching_values(&false_params,other_val->false_params);
coding_params.leading_zero=other_val->coding_params.leading_zero;
coding_params.repeatable=other_val->coding_params.repeatable;
coding_params.min_length=other_val->coding_params.min_length;
coding_params.max_length=other_val->coding_params.max_length;
coding_params.convert=other_val->coding_params.convert;
coding_params.just=other_val->coding_params.just;
decoding_params.leading_zero=other_val->decoding_params.leading_zero;
decoding_params.repeatable=other_val->decoding_params.repeatable;
decoding_params.min_length=other_val->decoding_params.min_length;
decoding_params.max_length=other_val->decoding_params.max_length;
decoding_params.convert=other_val->decoding_params.convert;
decoding_params.just=other_val->decoding_params.just;
if(other_val->decode_token)
decode_token=mcopystr(other_val->decode_token);
case_sensitive=other_val->case_sensitive;
}
}
TextAST::~TextAST()
{
if(begin_val){
Free(begin_val->encode_token);
Free(begin_val->decode_token);
Free(begin_val);
}
if(end_val){
Free(end_val->encode_token);
Free(end_val->decode_token);
Free(end_val);
}
if(separator_val){
Free(separator_val->encode_token);
Free(separator_val->decode_token);
Free(separator_val);
}
if(field_params){
for(size_t a=0;a<nof_field_params;a++){
//TODO: lets extract field_params[a] for speed
if(field_params[a]){
delete field_params[a]->name;
Free(field_params[a]->value.encode_token);
Free(field_params[a]->value.decode_token);
}
Free(field_params[a]);
}
Free(field_params);
}
if(true_params){
Free(true_params->encode_token);
Free(true_params->decode_token);
Free(true_params);
}
if(false_params){
Free(false_params->encode_token);
Free(false_params->decode_token);
Free(false_params);
}
Free(decode_token);
}
void TextAST::print_TextAST() const
{
printf("\n\rBegin:");
if(begin_val){
printf("\n\r Encode token:");
if(begin_val->encode_token) printf(" %s\n\r",begin_val->encode_token);
else printf(" NULL\n\r");
printf(" Decode token:");
if(begin_val->decode_token) printf(" %s\n\r",begin_val->decode_token);
else printf(" NULL\n\r");
if(begin_val->case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
}
else printf("NULL\n\r");
printf("End:");
if(end_val){
printf("\n\r Encode token:");
if(end_val->encode_token) printf(" %s\n\r",end_val->encode_token);
else printf(" NULL\n\r");
printf(" Decode token:");
if(end_val->decode_token) printf(" %s\n\r",end_val->decode_token);
else printf(" NULL\n\r");
if(end_val->case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
}
else printf("NULL\n\r");
printf("Separator:");
if(separator_val){
printf("\n\r Encode token:");
if(separator_val->encode_token) printf(" %s\n\r",separator_val->encode_token);
else printf(" NULL\n\r");
printf(" Decode token:");
if(separator_val->decode_token) printf(" %s\n\r",separator_val->decode_token);
else printf(" NULL\n\r");
if(separator_val->case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
}
else printf("NULL\n\r");
printf("Select token:");
if(decode_token) printf("%s",decode_token);
if(case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
printf("True:");
if(true_params){
printf("\n\r Encode token:");
if(true_params->encode_token) printf(" %s\n\r",true_params->encode_token);
else printf(" NULL\n\r");
printf(" Decode token:");
if(true_params->decode_token) printf(" %s\n\r",true_params->decode_token);
else printf(" NULL\n\r");
if(true_params->case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
}
else printf("NULL\n\r");
printf("False:");
if(true_params){
printf("\n\r Encode token:");
if(false_params->encode_token) printf(" %s\n\r",false_params->encode_token);
else printf(" NULL\n\r");
printf(" Decode token:");
if(false_params->decode_token) printf(" %s\n\r",false_params->decode_token);
else printf(" NULL\n\r");
if(false_params->case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
}
else printf("NULL\n\r");
printf("Number of fields:%lu\n\r",nof_field_params);
for(size_t a=0;a<nof_field_params;a++){
printf("Field %lu:\n\r",a);
if(field_params[a]){
printf(" Name: %s\n\r",field_params[a]->name->get_name().c_str());
printf(" Encode token:");
if(field_params[a]->value.encode_token) printf(" %s\n\r",field_params[a]->value.encode_token);
else printf(" NULL\n\r");
printf(" Decode token:");
if(field_params[a]->value.decode_token) printf(" %s\n\r",field_params[a]->value.decode_token);
else printf(" NULL\n\r");
if(field_params[a]->value.case_sensitive) printf(" case_sensitive\n\r");
else printf(" case_insensitive\n\r");
} else printf(" NULL\n\r");
}
printf("Coding params:\n\r Leading 0:");
if(coding_params.leading_zero) printf(" true\n\r");
else printf(" false\n\r");
printf(" Length: %i - %i\n\r",coding_params.min_length,coding_params.max_length);
printf(" Convert: %i\n\r Just:%i\n\r",coding_params.convert,coding_params.just);
printf("Decoding params:\n\r Leading 0:");
if(decoding_params.leading_zero) printf(" true\n\r");
else printf(" false\n\r");
printf(" Length: %i - %i\n\r",decoding_params.min_length,decoding_params.max_length);
printf(" Convert: %i\n\r Just:%i\n\r",decoding_params.convert,decoding_params.just);
}
size_t TextAST::get_field_param_index(const Common::Identifier *name)
{
for(size_t a=0;a<nof_field_params;a++){
if(*field_params[a]->name==*name) return a;
}
field_params=(textAST_enum_def **)Realloc(field_params,(nof_field_params+1)*sizeof(textAST_enum_def *));
field_params[nof_field_params]=(textAST_enum_def*)Malloc(sizeof(textAST_enum_def));
field_params[nof_field_params]->name= new Common::Identifier(*name);
field_params[nof_field_params]->value.encode_token=NULL;
field_params[nof_field_params]->value.decode_token=NULL;
field_params[nof_field_params]->value.case_sensitive=true;
nof_field_params++;
return nof_field_params-1;
}
void copy_textAST_matching_values(textAST_matching_values **to,
const textAST_matching_values *from)
{
if(from==NULL) return;
if(*to==NULL)
*to=(textAST_matching_values*)Malloc(sizeof(textAST_matching_values));
if(from->encode_token) (*to)->encode_token=mcopystr(from->encode_token);
else (*to)->encode_token=NULL;
if(from->decode_token && !from->generated_decode_token)
(*to)->decode_token=mcopystr(from->decode_token);
else (*to)->decode_token=NULL;
(*to)->case_sensitive=from->case_sensitive;
(*to)->generated_decode_token=false;
}
char *process_decode_token(const char *decode_token,
const Common::Location& loc)
{
enum { INITIAL, BS, BS_N, BS_Q } state = INITIAL;
// points to the last backslash found
size_t bs_idx = 0;
char *ret_val = memptystr();
for (size_t i = 0; decode_token[i] != '\0'; i++) {
switch (state) {
case INITIAL:
switch (decode_token[i]) {
case '\\':
state = BS;
bs_idx = i;
break;
case '{':
// {reference}: find the matching bracket and skip the entire reference
for (size_t j = i + 1; ; j++) {
if (decode_token[j] == '\0') {
loc.error("Invalid reference `%s' in matching pattern",
decode_token + i);
i = j - 1;
break;
} else if (decode_token[j] == '}') {
loc.error("Reference `%s' is not allowed in matching pattern",
string(j + 1 - i, decode_token + i).c_str());
i = j;
break;
}
}
break;
case '\'':
case '"':
// replace '' -> ' and "" -> "
ret_val = mputc(ret_val, decode_token[i]);
if (decode_token[i + 1] == decode_token[i]) i++;
break;
default:
ret_val = mputc(ret_val, decode_token[i]);
}
break;
case BS:
switch (decode_token[i]) {
case 'N':
state = BS_N;
break;
case 'q':
state = BS_Q;
break;
case '\'':
case '"':
// replace \' -> ' and \" -> "
ret_val = mputc(ret_val, decode_token[i]);
state = INITIAL;
break;
default:
// keep other escape sequences
ret_val = mputc(ret_val, '\\');
ret_val = mputc(ret_val, decode_token[i]);
state = INITIAL;
}
break;
case BS_N:
switch (decode_token[i]) {
case ' ':
case '\t':
case '\r':
case '\n':
case '\v':
case '\f':
// ignore whitespace following \N
break;
case '{':
// \N{reference}: find the matching bracket and skip the entire
// reference
for (size_t j = i + 1; ; j++) {
if (decode_token[j] == '\0') {
loc.error("Invalid character set reference `%s' in matching "
"pattern", decode_token + bs_idx);
i = j - 1;
break;
} else if (decode_token[j] == '}') {
loc.error("Character set reference `%s' is not allowed in matching "
"pattern", string(j + 1 - bs_idx, decode_token + bs_idx).c_str());
i = j;
break;
}
}
state = INITIAL;
break;
default:
loc.error("Invalid character set reference `%s' in matching pattern",
string(i + 1 - bs_idx, decode_token + bs_idx).c_str());
state = INITIAL;
}
break;
case BS_Q:
switch (decode_token[i]) {
case ' ':
case '\t':
case '\r':
case '\n':
case '\v':
case '\f':
// ignore whitespace following \q
break;
case '{':
// copy the rest of the quadruple literally
ret_val = mputstr(ret_val, "\\q{");
state = INITIAL;
break;
default:
loc.error("Invalid quadruple notation `%s' in matching pattern",
string(i + 1 - bs_idx, decode_token + bs_idx).c_str());
state = INITIAL;
}
}
}
// final check of state
switch (state) {
case INITIAL:
break;
case BS:
loc.error("Invalid trailing backslash in matching pattern");
break;
case BS_N:
loc.error("Invalid character set reference `%s' in matching pattern",
decode_token + bs_idx);
break;
case BS_Q:
loc.error("Invalid quadruple notation `%s' in matching pattern",
decode_token + bs_idx);
}
return ret_val;
}
extern bool has_meta(); // in pattern_la.l
extern int pattern_yylex();
extern void init_pattern_yylex(YYSTYPE *p);
struct yy_buffer_state;
extern yy_buffer_state* pattern_yy_scan_string(const char*);
extern int pattern_yylex_destroy();
char *make_posix_str_code(const char *pat, bool cs)
{
if (pat == NULL || pat[0] == '\0') return mcopystr("I");
char *ret_val;
ret_val = mcopystr(cs ? "I" : "N"); // Case sensitive? _I_gen / _N_em
// Run the pattern lexer over the pattern to check for meta-characters.
// TTCN_pattern_to_regexp() would be overkill.
yy_buffer_state *flex_buffer = pattern_yy_scan_string(pat);
if (flex_buffer == NULL) {} //a fatal error was already generated
else {
YYSTYPE yylval;
init_pattern_yylex(&yylval);
while (pattern_yylex()) {} // 0 means end-of-string
pattern_yylex_destroy();
}
// If the matching is case sensitive and there are no meta-characters,
// matching for fixed strings can be done without regular expressions.
const bool maybe_fixed = cs && !has_meta();
char *mod_token = mprintf("(%s)*", pat);
// disable all warnings and "not supported" messages while translating the
// pattern to POSIX regexp again (these messages were already reported
// during semantic analysis)
unsigned orig_verb_level = verb_level;
verb_level &= ~(1|2);
char *posix_str = TTCN_pattern_to_regexp(mod_token);
verb_level = orig_verb_level;
Free(mod_token);
if (posix_str == NULL) FATAL_ERROR("make_posix_str_code()");
size_t len = mstrlen(posix_str);
if (maybe_fixed) { // maybe we can do fixed strings
*ret_val = 'F'; // "fixed"
// posix_str contains: ^(pat).*$ but all we need is pat
ret_val = mputstrn(ret_val, posix_str + 2, len - 6);
}
else { // no fixed-string optimization possible
size_t skip = 0;
// TEXT decoder optimization
if (len > 3) {
if (!memcmp(posix_str + (len - 3), ".*$", 3)) {
// The POSIX RE is set to match a bunch of characters before the end.
// This happens if the TTCN pattern wanted to match anywhere, not just
// at the end (TTCN patterns are anchored at the beginning and end).
// Instead of looking for (0 or more) characters and ignoring them,
// just don't look for them at all. RE are not anchored by default.
posix_str = mtruncstr(posix_str, len -= 3);
}
if (len > 3) { // len might have changed while truncating, above
// Un-anchor the cheap way, at the beginning.
skip = memcmp(posix_str, "^.*", 3) ? 0 : 3;
}
}
ret_val = mputstr(ret_val, posix_str + skip);
}
Free(posix_str);
return ret_val;
}
char *convert_charstring_to_pattern(const char *str)
{
char *ret_val = memptystr();
for (size_t i = 0; str[i]; i++) {
switch (str[i]) {
case '?':
case '*':
case '\\':
case '[':
case ']':
case '{':
case '}':
case '|':
case '(':
case ')':
case '#':
case '+':
// special characters of TTCN-3 patterns; escape with a backslash
ret_val = mputc(ret_val, '\\');
// no break
default:
ret_val = mputc(ret_val, str[i]);
}
}
return ret_val;
}
void init_textAST_matching_values(textAST_matching_values *val){
val->encode_token=NULL;
val->decode_token=NULL;
val->case_sensitive=true;
val->generated_decode_token=false;
}