From 8c732512ca284f2a586694c7c33f1a0a4a68cef7 Mon Sep 17 00:00:00 2001 From: Alexei Sheplyakov Date: Sun, 14 Sep 2008 07:13:01 +0400 Subject: [PATCH] Wipe out the old (bison/flex generated) parser. Bison generated parser has a number of problems: 1. Bad performance. Parsing a sum seems to be O(N^{2 + a}) (a > 0). For example, parsing a sum (actually, a univariate polynomial) of 32768 terms takes about 90 sec. on my box, parsing a sum of 10^6 terms takes "eternity". 2. The user is expected to provide list of all symbols in the expression. Often this is very annoying (and useless), sometimes it is not possible at all. 3. Parser is not reentrant (bison *can* produce reentrant parsers, but that won't solve other problems). 4. Parser is difficult to extend. Since the new parser handles almost everything (useful) as the old one, let's remove the latter. --- ginac/Makefile.am | 8 +- ginac/ex.cpp | 1 - ginac/input_lexer.h | 69 -------------- ginac/input_lexer.ll | 211 ------------------------------------------ ginac/input_parser.yy | 201 ---------------------------------------- 5 files changed, 3 insertions(+), 487 deletions(-) delete mode 100644 ginac/input_lexer.h delete mode 100644 ginac/input_lexer.ll delete mode 100644 ginac/input_parser.yy diff --git a/ginac/Makefile.am b/ginac/Makefile.am index 2614c457..db09d388 100644 --- a/ginac/Makefile.am +++ b/ginac/Makefile.am @@ -8,8 +8,8 @@ libginac_la_SOURCES = add.cpp archive.cpp basic.cpp clifford.cpp color.cpp \ integral.cpp lst.cpp matrix.cpp mul.cpp ncmul.cpp normal.cpp numeric.cpp \ operators.cpp power.cpp registrar.cpp relational.cpp remember.cpp \ pseries.cpp print.cpp symbol.cpp symmetry.cpp tensor.cpp \ - utils.cpp wildcard.cpp input_parser.yy input_lexer.ll \ - input_lexer.h remember.h tostring.h utils.h compiler.h \ + utils.cpp wildcard.cpp \ + remember.h tostring.h utils.h compiler.h \ parser/parse_binop_rhs.cpp \ parser/parser.cpp \ parser/parse_context.cpp \ @@ -31,9 +31,7 @@ ginacinclude_HEADERS = ginac.h add.h archive.h assertion.h basic.h class_info.h parser/parser.hpp \ parser/parse_context.hpp -AM_LFLAGS = -Pginac_yy -olex.yy.c -AM_YFLAGS = -p ginac_yy -d -EXTRA_DIST = function.pl input_parser.h version.h.in \ +EXTRA_DIST = function.pl version.h.in \ parser/default_reader.tpl parser/builtin_fcns.def # Files produced by autogen(1) from templates diff --git a/ginac/ex.cpp b/ginac/ex.cpp index 7becb3e1..25d5607f 100644 --- a/ginac/ex.cpp +++ b/ginac/ex.cpp @@ -32,7 +32,6 @@ #include "power.h" #include "lst.h" #include "relational.h" -#include "input_lexer.h" #include "utils.h" namespace GiNaC { diff --git a/ginac/input_lexer.h b/ginac/input_lexer.h deleted file mode 100644 index 8cffe6be..00000000 --- a/ginac/input_lexer.h +++ /dev/null @@ -1,69 +0,0 @@ -/** @file input_lexer.h - * - * Lexical analyzer definition for reading expressions. - * This file must be processed with flex. */ - -/* - * GiNaC Copyright (C) 1999-2008 Johannes Gutenberg University Mainz, Germany - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef __GINAC_INPUT_LEXER_H__ -#define __GINAC_INPUT_LEXER_H__ - -extern "C" { -#include -} - -#include "config.h" - -// yacc stack type -#define YYSTYPE ex - -// lex functions/variables -extern int ginac_yyerror(char *s); -extern int ginac_yylex(void); -extern void ginac_yyrestart(FILE *f); -extern char *ginac_yytext; - -namespace GiNaC { - -class ex; - -/** Set the input string to be parsed by ginac_yyparse() (used internally). */ -extern void set_lexer_string(const std::string &s); - -/** Get name of symbol/index (used internally). */ -extern std::string get_symbol_name(const ex & s); - -/** Set the list of predefined symbols for the lexer (used internally). */ -extern void set_lexer_symbols(ex l); - -/** Check whether lexer symbol was predefined (vs. created by the lexer, e.g. function names). */ -extern bool is_lexer_symbol_predefined(const ex &s); - -/** The expression parser function (used internally). */ -extern int ginac_yyparse(void); - -/** The expression returned by the parser (used internally). */ -extern ex parsed_ex; - -/** Get error message from the parser. */ -extern std::string get_parser_error(void); - -} // namespace GiNaC - -#endif // ndef __GINAC_INPUT_LEXER_H__ diff --git a/ginac/input_lexer.ll b/ginac/input_lexer.ll deleted file mode 100644 index 0a7bbec0..00000000 --- a/ginac/input_lexer.ll +++ /dev/null @@ -1,211 +0,0 @@ -/** @file input_lexer.ll - * - * Lexical analyzer definition for reading expressions. - * This file must be processed with flex. */ - -/* - * GiNaC Copyright (C) 1999-2008 Johannes Gutenberg University Mainz, Germany - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - -/* - * Definitions - */ - -%pointer - -%{ -#include -#include -#include -#include - -#include "input_lexer.h" -#include "ex.h" -#include "constant.h" -#include "fail.h" -#include "numeric.h" -#include "symbol.h" -#include "lst.h" -#include "idx.h" - -using namespace GiNaC; -namespace GiNaC { - -#include "input_parser.h" - -} // namespace GiNaC - -// Table of all used symbols/indices -struct sym_def { - sym_def() : predefined(false) {} - sym_def(const ex &s, bool predef) : sym(s), predefined(predef) {} - ~sym_def() {} - - sym_def(const sym_def &other) {sym = other.sym; predefined = other.predefined;} - const sym_def &operator=(const sym_def &other) - { - if (this != &other) { - sym = other.sym; - predefined = other.predefined; - } - return *this; - } - - ex sym; - bool predefined; // true = user supplied symbol, false = lexer generated symbol -}; -typedef std::map sym_tab; -static sym_tab syms; - -// lex input function -static int lexer_input(char *buf, int max_size); -#define YY_INPUT(buf, result, max_size) (result = lexer_input(buf, max_size)) -%} - - /* Abbreviations */ -D [0-9] -E [elEL][-+]?{D}+ -A [a-zA-Z_] -AN [0-9a-zA-Z_] - - -/* - * Lexical rules - */ - -%% -[ \t\n]+ /* skip whitespace */ - - /* special values */ -Pi ginac_yylval = Pi; return T_LITERAL; -Euler ginac_yylval = Euler; return T_LITERAL; -Catalan ginac_yylval = Catalan; return T_LITERAL; -FAIL ginac_yylval = *new fail(); return T_LITERAL; -I ginac_yylval = I; return T_NUMBER; -Digits ginac_yylval = (long)Digits; return T_DIGITS; - - /* comparison */ -"==" return T_EQUAL; -"!=" return T_NOTEQ; -"<=" return T_LESSEQ; -">=" return T_GREATEREQ; - - /* numbers */ -{D}+ | -"#"{D}+"R"{AN}+ | -"#b"([01])+ | -"#o"[0-7]+ | -"#x"[0-9a-fA-F]+ | -{D}+"."{D}*({E})? | -{D}*"."{D}+({E})? | -{D}+{E} ginac_yylval = numeric(yytext); return T_NUMBER; - - /* symbols */ -{A}{AN}* { - sym_tab::const_iterator i = syms.find(yytext); - if (i == syms.end()) { - symbol tmp(yytext); - ginac_yylval = tmp; - syms[yytext] = sym_def(tmp, false); - } else - ginac_yylval = (*i).second.sym; - return T_SYMBOL; - } - - /* end of input */ -<> return T_EOF; - - /* everything else */ -. return *yytext; - -%% - - -/* - * Routines - */ - -// The string from which we will read -static std::string lexer_string; - -// The current position within the string -static int curr_pos = 0; - -// Input function that reads from string -static int lexer_input(char *buf, int max_size) -{ - int actual = lexer_string.length() - curr_pos; - if (actual > max_size) - actual = max_size; - if (actual <= 0) - return YY_NULL; - lexer_string.copy(buf, actual, curr_pos); - curr_pos += actual; - return actual; -} - -// EOF encountered, terminate the scanner -int ginac_yywrap() -{ - return 1; -} - -namespace GiNaC { - -// Set the input string -void set_lexer_string(const std::string &s) -{ - lexer_string = s; - curr_pos = 0; -} - -// Get name of symbol/index -std::string get_symbol_name(const ex & s) -{ - if (is_a(s)) - return ex_to(s).get_name(); - else if (is_a(s) && is_a(s.op(0))) - return ex_to(s.op(0)).get_name(); - else - throw (std::runtime_error("get_symbol_name(): unexpected expression type")); -} - -// Set the list of predefined symbols/indices -void set_lexer_symbols(ex l) -{ - syms.clear(); - if (!is_exactly_a(l)) - return; - for (unsigned i=0; i(o) || (is_a(o) && is_a(o.op(0)))) - syms[get_symbol_name(o)] = sym_def(o, true); - } -} - -// Check whether symbol/index was predefined -bool is_lexer_symbol_predefined(const ex &s) -{ - sym_tab::const_iterator i = syms.find(get_symbol_name(s)); - if (i == syms.end()) - return false; - else - return (*i).second.predefined; -} - -} // namespace GiNaC diff --git a/ginac/input_parser.yy b/ginac/input_parser.yy deleted file mode 100644 index b28e63a1..00000000 --- a/ginac/input_parser.yy +++ /dev/null @@ -1,201 +0,0 @@ -/** @file input_parser.yy - * - * Input grammar definition for reading expressions. - * This file must be processed with yacc/bison. */ - -/* - * GiNaC Copyright (C) 1999-2008 Johannes Gutenberg University Mainz, Germany - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - -/* - * Definitions - */ - -%{ -#include - -#include "ex.h" -#include "input_lexer.h" -#include "relational.h" -#include "operators.h" -#include "symbol.h" -#include "lst.h" -#include "power.h" -#include "exprseq.h" -#include "idx.h" -#include "indexed.h" -#include "matrix.h" -#include "inifcns.h" - -namespace GiNaC { - -#define YYERROR_VERBOSE 1 - -// Parsed output expression -ex parsed_ex; - -// Last error message returned by parser -static std::string parser_error; - -// Prototypes -ex attach_index(const ex & base, ex i, bool covariant); -%} - -/* Tokens (T_LITERAL means a literal value returned by the parser, but not - of class numeric or symbol (e.g. a constant or the FAIL object)) */ -%token T_EOF T_NUMBER T_SYMBOL T_LITERAL T_DIGITS T_EQUAL T_NOTEQ T_LESSEQ T_GREATEREQ - -/* Operator precedence and associativity */ -%right '=' -%left T_EQUAL T_NOTEQ -%left '<' '>' T_LESSEQ T_GREATEREQ -%left '+' '-' -%left '*' '/' '%' -%nonassoc NEG -%right '^' -%left '.' '~' -%nonassoc '!' - -%start input - - -/* - * Grammar rules - */ - -%% -input : exp T_EOF { - try { - parsed_ex = $1; - YYACCEPT; - } catch (std::exception &err) { - parser_error = err.what(); - YYERROR; - } - } - ; - -exp : T_NUMBER {$$ = $1;} - | T_SYMBOL { - if (is_lexer_symbol_predefined($1)) - $$ = $1.eval(); - else - throw (std::runtime_error("unknown symbol '" + get_symbol_name($1) + "'")); - } - | T_LITERAL {$$ = $1;} - | T_DIGITS {$$ = $1;} - | T_SYMBOL '(' exprseq ')' { - std::string n = get_symbol_name($1); - if (n == "sqrt") { - if ($3.nops() != 1) - throw (std::runtime_error("too many arguments to sqrt()")); - $$ = sqrt($3.op(0)); - } else if (n == "pow" || n == "power") { - if ($3.nops() != 2) - throw std::invalid_argument("wrong number of arguments to pow()"); - $$ = power($3.op(0), $3.op(0)); - } else { - unsigned i = function::find_function(n, $3.nops()); - $$ = function(i, ex_to($3)).eval(1); - } - } - | exp T_EQUAL exp {$$ = $1 == $3;} - | exp T_NOTEQ exp {$$ = $1 != $3;} - | exp '<' exp {$$ = $1 < $3;} - | exp T_LESSEQ exp {$$ = $1 <= $3;} - | exp '>' exp {$$ = $1 > $3;} - | exp T_GREATEREQ exp {$$ = $1 >= $3;} - | exp '+' exp {$$ = $1 + $3;} - | exp '-' exp {$$ = $1 - $3;} - | exp '*' exp {$$ = $1 * $3;} - | exp '/' exp {$$ = $1 / $3;} - | '-' exp %prec NEG {$$ = -$2;} - | '+' exp %prec NEG {$$ = $2;} - | exp '^' exp {$$ = pow($1, $3);} - | exp '.' exp {$$ = attach_index($1, $3, true);} - | exp '~' exp {$$ = attach_index($1, $3, false);} - | exp '!' {$$ = factorial($1);} - | '(' exp ')' {$$ = $2;} - | '{' list_or_empty '}' {$$ = $2;} - | '[' matrix ']' {$$ = lst_to_matrix(ex_to($2));} - ; - -exprseq : exp {$$ = exprseq($1);} - | exprseq ',' exp {exprseq es(ex_to($1)); $$ = es.append($3);} - ; - -list_or_empty: /* empty */ {$$ = *new lst;} - | list {$$ = $1;} - ; - -list : exp {$$ = lst($1);} - | list ',' exp {lst l(ex_to($1)); $$ = l.append($3);} - ; - -matrix : '[' row ']' {$$ = lst($2);} - | matrix ',' '[' row ']' {lst l(ex_to($1)); $$ = l.append($4);} - ; - -row : exp {$$ = lst($1);} - | row ',' exp {lst l(ex_to($1)); $$ = l.append($3);} - ; - - -/* - * Routines - */ - -%% -// Attach index to expression -ex attach_index(const ex & base, ex i, bool covariant) -{ - // Toggle index variance if necessary - if (is_a(i)) { - const varidx &vi = ex_to(i); - if (vi.is_covariant() != covariant) - i = vi.toggle_variance(); - } else if (!covariant) - throw (std::runtime_error("index '" + get_symbol_name(i) + "' is not a varidx and cannot be contravariant")); - - // Add index to an existing indexed object, or create a new indexed - // object if there are no indices yet - if (is_a(base)) { - const ex &b = base.op(0); - exvector iv; - for (unsigned n=1; n