[GiNaC-list] GiNaC::parser parses unary operators in the wrong order
Vitaly Magerya
vmagerya at gmail.com
Mon Jun 12 17:59:43 CEST 2023
On 20/02/2023, I wrote:
> Hi, all. When parsing expressions with sign in the exponent, GiNaC's
> parses confuses the priorities somehow. Here are some examples:
>
> 3^2+1 is parsed as (3^2)+1, which is correct;
> 3^+2+1 is parsed as 3^(+2+1), which is wrong;
> 3^-2+1 is parsed as 3^(-2+1), which is wrong;
> 3^+2/3 is parsed as 3^(+2/3), which is wrong;
> etc.
So after some more testing I found that GiNaC doesn't only
misparse the exponentiation this way, but multiplication too.
Things like "3*+2+1" are parsed wrong too. The handling of the
unary operators is broken if they appear after other operators.
As a separate problem, parsing nested exponentiation, like
"2^3^4", results in GiNaC throwing an exception, saying that
"power should have exactly 2 operands", which is false: for
example ginsh handles "2^3^4" easily and correctly.
So, with some work I was able to fix both of these problems. I'm
attaching a proposed solution, with tests included. Would it be
possible to commit this?
I would additionally like to request a point release after this
fix; this parsing problem potentially affects our usage of GiNaC,
and we'd rather stay with a release version than build our own
release tarballs.
-------------- next part --------------
diff --git a/check/exam_parser.cpp b/check/exam_parser.cpp
index 497d9160..5149553e 100644
--- a/check/exam_parser.cpp
+++ b/check/exam_parser.cpp
@@ -98,6 +98,104 @@ static int check4(ostream& err_str)
}
}
+// Check that two strings parse to equal expressions.
+static int check_eq(ostream &err_str, parser &reader, const char *expr1, const char *expr2)
+{
+ const string srep1(expr1);
+ const string srep2(expr2);
+ ex e1, e2;
+ try{ e1 = reader(srep1); } catch (const exception &e) {
+ err_str << "\"" << srep1 << "\" failed to parse: "
+ << e.what() << endl;
+ return 1;
+ }
+ try{ e2 = reader(srep2); } catch (const exception &e) {
+ err_str << "\"" << srep2 << "\" failed to parse: "
+ << e.what() << endl;
+ return 1;
+ }
+ if (!(e1-e2).expand().is_zero()) {
+ err_str << "\"" << srep1 << "\" was misparsed as \""
+ << e1 << "\"" << endl;
+ return 1;
+ }
+ return 0;
+}
+
+// Tests for the interaction of the '^' operator with
+// the unary '+' and the unary '-' operators
+static int check5(ostream& err_str)
+{
+ parser reader;
+ return
+ +check_eq(err_str, reader, "3^2+1", "10")
+ +check_eq(err_str, reader, "3^+2-1", "8")
+ +check_eq(err_str, reader, "3^-2+1", "10/9")
+ +check_eq(err_str, reader, "3^-2/5", "1/45")
+ +check_eq(err_str, reader, "3^-2*5", "5/9")
+ +check_eq(err_str, reader, "3^-2-5", "-44/9")
+ +check_eq(err_str, reader, "3^(-2)+1", "10/9")
+ +check_eq(err_str, reader, "(3)^(-2)+1", "10/9")
+ +check_eq(err_str, reader, "+3^2+1", "10")
+ +check_eq(err_str, reader, "+3^+2+1", "10")
+ +check_eq(err_str, reader, "+3^-2+1", "10/9")
+ +check_eq(err_str, reader, "+3^-2/5", "1/45")
+ +check_eq(err_str, reader, "+3^-2*5", "5/9")
+ +check_eq(err_str, reader, "+3^-2-5", "-44/9")
+ +check_eq(err_str, reader, "-3^2+1", "-8")
+ +check_eq(err_str, reader, "-3^+2+1", "-8")
+ +check_eq(err_str, reader, "-3^-2+1", "8/9")
+ +check_eq(err_str, reader, "-3^-2/3", "-1/27")
+ +check_eq(err_str, reader, "1+2^3^4", "1+(2^81)")
+ +check_eq(err_str, reader, "2^3^4+1", "1+(2^81)")
+ +check_eq(err_str, reader, "2^+3^4+1", "1+(2^81)")
+ +check_eq(err_str, reader, "2^3^+4+1", "1+(2^81)");
+}
+
+// Tests for the interaction of the '*' operator with
+// the unary '+' and the unary '-' operators
+static int check6(ostream& err_str)
+{
+ parser reader;
+ return
+ +check_eq(err_str, reader, "3*+2-1", "5")
+ +check_eq(err_str, reader, "3*2+1", "7")
+ +check_eq(err_str, reader, "3*+2+1", "7")
+ +check_eq(err_str, reader, "3*-2+1", "-5")
+ +check_eq(err_str, reader, "3*-2/5", "-6/5")
+ +check_eq(err_str, reader, "3*-2*5", "-30")
+ +check_eq(err_str, reader, "3*-2-5", "-11")
+ +check_eq(err_str, reader, "3*(-2)+1", "-5")
+ +check_eq(err_str, reader, "(3)*(-2)+1", "-5")
+ +check_eq(err_str, reader, "+3*2+1", "7")
+ +check_eq(err_str, reader, "+3*+2+1", "7")
+ +check_eq(err_str, reader, "+3*-2+1", "-5")
+ +check_eq(err_str, reader, "+3*-2/5", "-6/5")
+ +check_eq(err_str, reader, "+3*-2*5", "-30")
+ +check_eq(err_str, reader, "+3*-2-5", "-11")
+ +check_eq(err_str, reader, "-3*2+1", "-5")
+ +check_eq(err_str, reader, "-3*+2+1", "-5")
+ +check_eq(err_str, reader, "-3*-2+1", "7")
+ +check_eq(err_str, reader, "-3*-2/3", "2")
+ +check_eq(err_str, reader, "1+2*3*4", "25")
+ +check_eq(err_str, reader, "2*3*4+1", "25")
+ +check_eq(err_str, reader, "2*+3*4+1", "25")
+ +check_eq(err_str, reader, "2*3*+4+1", "25");
+}
+
+// Tests for nested unary + and unary -
+static int check7(ostream& err_str)
+{
+ parser reader;
+ return
+ +check_eq(err_str, reader, "+1", "1")
+ +check_eq(err_str, reader, "++1", "1")
+ +check_eq(err_str, reader, "+-+1", "-1")
+ +check_eq(err_str, reader, "+-+-1", "1")
+ +check_eq(err_str, reader, "+-+-+1", "1")
+ +check_eq(err_str, reader, "100++--+1+10", "111");
+}
+
int main(int argc, char** argv)
{
cout << "examining old parser bugs" << flush;
@@ -107,6 +205,9 @@ int main(int argc, char** argv)
errors += check2(err_str); cout << '.' << flush;
errors += check3(err_str); cout << '.' << flush;
errors += check4(err_str); cout << '.' << flush;
+ errors += check5(err_str); cout << '.' << flush;
+ errors += check6(err_str); cout << '.' << flush;
+ errors += check7(err_str); cout << '.' << flush;
if (errors) {
cout << "Yes, unfortunately:" << endl;
cout << err_str.str();
diff --git a/ginac/parser/parse_binop_rhs.cpp b/ginac/parser/parse_binop_rhs.cpp
index 1b3d3254..a56a485c 100644
--- a/ginac/parser/parse_binop_rhs.cpp
+++ b/ginac/parser/parse_binop_rhs.cpp
@@ -114,6 +114,18 @@ ex parser::parse_binop_rhs(int expr_prec, ex& lhs)
}
}
+/// unary_expr: [+-] expression
+ex parser::parse_unary_expr()
+{
+ // Parse a binary expression with the priority of exponentiation
+ // or higher. Ignore the overall sign, because parse_primary()
+ // handles it for us.
+ get_next_tok(); // Skip [+-]
+ ex lhs = parse_primary();
+ ex e = parse_binop_rhs(get_tok_prec('^'), lhs);
+ return e;
+}
+
extern const numeric* _num_1_p;
static ex make_minus_expr(const exvector& args)
@@ -137,6 +149,16 @@ static ex make_divide_expr(const exvector& args)
return dynallocate<mul>(args[0], rest);
}
+static ex make_power_expr(const exvector& args)
+{
+ size_t n = args.size();
+ ex p = pow(args[n - 2], args[n - 1]);
+ for (size_t i = n - 2; i > 0; i--) {
+ p = pow(args[i - 1], p);
+ }
+ return p;
+}
+
static ex make_binop_expr(const int binop, const exvector& args)
{
switch (binop) {
@@ -149,11 +171,7 @@ static ex make_binop_expr(const int binop, const exvector& args)
case '/':
return make_divide_expr(args);
case '^':
- if (args.size() != 2)
- throw std::invalid_argument(
- std::string(__func__)
- + ": power should have exactly 2 operands");
- return pow(args[0], args[1]);
+ return make_power_expr(args);
default:
throw std::invalid_argument(
std::string(__func__)
diff --git a/ginac/parser/parser.cpp b/ginac/parser/parser.cpp
index 24f5136a..c584706f 100644
--- a/ginac/parser/parser.cpp
+++ b/ginac/parser/parser.cpp
@@ -27,6 +27,7 @@
#include "mul.h"
#include "constant.h"
#include "function.h"
+#include "operators.h"
#include <cstdint> // for uintptr_t
#include <sstream>
@@ -144,27 +145,6 @@ ex parser::parse_lst_expr()
return list;
}
-extern const ex _ex0;
-
-/// unary_expr: [+-] expression
-ex parser::parse_unary_expr()
-{
- // Unlike most other parse_* method this one does NOT consume
- // current token so parse_binop_rhs() knows what kind of operator
- // is being parsed.
-
- // There are different kinds of expressions which need to be handled:
- // -a+b
- // -(a)
- // +a
- // +(a)
- // Delegate the work to parse_binop_rhs(), otherwise we end up
- // duplicating it here.
- ex lhs = _ex0; // silly trick
- ex e = parse_binop_rhs(0, lhs);
- return e;
-}
-
/// primary: identifier_expr | number_expr | paren_expr | unary_expr
ex parser::parse_primary()
{
@@ -178,6 +158,7 @@ ex parser::parse_primary()
case '{':
return parse_lst_expr();
case '-':
+ return -parse_unary_expr();
case '+':
return parse_unary_expr();
case lexer::token_type::literal:
More information about the GiNaC-list
mailing list