[GiNaC-list] GiNaC::parser parses unary operators in the wrong order

Vitaly Magerya vmagerya at gmail.com
Mon Jun 12 17:59:43 CEST 2023


On 20/02/2023, I wrote:
> Hi, all. When parsing expressions with sign in the exponent, GiNaC's
> parses confuses the priorities somehow. Here are some examples:
>
>     3^2+1 is parsed as (3^2)+1, which is correct;
>     3^+2+1 is parsed as 3^(+2+1), which is wrong;
>     3^-2+1 is parsed as 3^(-2+1), which is wrong;
>     3^+2/3 is parsed as 3^(+2/3), which is wrong;
>     etc.

So after some more testing I found that GiNaC doesn't only
misparse the exponentiation this way, but multiplication too.
Things like "3*+2+1" are parsed wrong too. The handling of the
unary operators is broken if they appear after other operators.

As a separate problem, parsing nested exponentiation, like
"2^3^4", results in GiNaC throwing an exception, saying that
"power should have exactly 2 operands", which is false: for
example ginsh handles "2^3^4" easily and correctly.

So, with some work I was able to fix both of these problems. I'm
attaching a proposed solution, with tests included. Would it be
possible to commit this?

I would additionally like to request a point release after this
fix; this parsing problem potentially affects our usage of GiNaC,
and we'd rather stay with a release version than build our own
release tarballs.
-------------- next part --------------
diff --git a/check/exam_parser.cpp b/check/exam_parser.cpp
index 497d9160..5149553e 100644
--- a/check/exam_parser.cpp
+++ b/check/exam_parser.cpp
@@ -98,6 +98,104 @@ static int check4(ostream& err_str)
 	}
 }
 
+// Check that two strings parse to equal expressions.
+static int check_eq(ostream &err_str, parser &reader, const char *expr1, const char *expr2)
+{
+	const string srep1(expr1);
+	const string srep2(expr2);
+	ex e1, e2;
+	try{ e1 = reader(srep1); } catch (const exception &e) {
+		err_str << "\"" << srep1 << "\" failed to parse: "
+			<< e.what() << endl;
+		return 1;
+	}
+	try{ e2 = reader(srep2); } catch (const exception &e) {
+		err_str << "\"" << srep2 << "\" failed to parse: "
+			<< e.what() << endl;
+		return 1;
+	}
+	if (!(e1-e2).expand().is_zero()) {
+		err_str << "\"" << srep1 << "\" was misparsed as \""
+			<< e1 << "\"" << endl;
+		return 1;
+	}
+	return 0;
+}
+
+// Tests for the interaction of the '^' operator with
+// the unary '+' and the unary '-' operators
+static int check5(ostream& err_str)
+{
+	parser reader;
+	return
+		+check_eq(err_str, reader, "3^2+1", "10")
+		+check_eq(err_str, reader, "3^+2-1", "8")
+		+check_eq(err_str, reader, "3^-2+1", "10/9")
+		+check_eq(err_str, reader, "3^-2/5", "1/45")
+		+check_eq(err_str, reader, "3^-2*5", "5/9")
+		+check_eq(err_str, reader, "3^-2-5", "-44/9")
+		+check_eq(err_str, reader, "3^(-2)+1", "10/9")
+		+check_eq(err_str, reader, "(3)^(-2)+1", "10/9")
+		+check_eq(err_str, reader, "+3^2+1", "10")
+		+check_eq(err_str, reader, "+3^+2+1", "10")
+		+check_eq(err_str, reader, "+3^-2+1", "10/9")
+		+check_eq(err_str, reader, "+3^-2/5", "1/45")
+		+check_eq(err_str, reader, "+3^-2*5", "5/9")
+		+check_eq(err_str, reader, "+3^-2-5", "-44/9")
+		+check_eq(err_str, reader, "-3^2+1", "-8")
+		+check_eq(err_str, reader, "-3^+2+1", "-8")
+		+check_eq(err_str, reader, "-3^-2+1", "8/9")
+		+check_eq(err_str, reader, "-3^-2/3", "-1/27")
+		+check_eq(err_str, reader, "1+2^3^4", "1+(2^81)")
+		+check_eq(err_str, reader, "2^3^4+1", "1+(2^81)")
+		+check_eq(err_str, reader, "2^+3^4+1", "1+(2^81)")
+		+check_eq(err_str, reader, "2^3^+4+1", "1+(2^81)");
+}
+
+// Tests for the interaction of the '*' operator with
+// the unary '+' and the unary '-' operators
+static int check6(ostream& err_str)
+{
+	parser reader;
+	return
+		+check_eq(err_str, reader, "3*+2-1", "5")
+		+check_eq(err_str, reader, "3*2+1", "7")
+		+check_eq(err_str, reader, "3*+2+1", "7")
+		+check_eq(err_str, reader, "3*-2+1", "-5")
+		+check_eq(err_str, reader, "3*-2/5", "-6/5")
+		+check_eq(err_str, reader, "3*-2*5", "-30")
+		+check_eq(err_str, reader, "3*-2-5", "-11")
+		+check_eq(err_str, reader, "3*(-2)+1", "-5")
+		+check_eq(err_str, reader, "(3)*(-2)+1", "-5")
+		+check_eq(err_str, reader, "+3*2+1", "7")
+		+check_eq(err_str, reader, "+3*+2+1", "7")
+		+check_eq(err_str, reader, "+3*-2+1", "-5")
+		+check_eq(err_str, reader, "+3*-2/5", "-6/5")
+		+check_eq(err_str, reader, "+3*-2*5", "-30")
+		+check_eq(err_str, reader, "+3*-2-5", "-11")
+		+check_eq(err_str, reader, "-3*2+1", "-5")
+		+check_eq(err_str, reader, "-3*+2+1", "-5")
+		+check_eq(err_str, reader, "-3*-2+1", "7")
+		+check_eq(err_str, reader, "-3*-2/3", "2")
+		+check_eq(err_str, reader, "1+2*3*4", "25")
+		+check_eq(err_str, reader, "2*3*4+1", "25")
+		+check_eq(err_str, reader, "2*+3*4+1", "25")
+		+check_eq(err_str, reader, "2*3*+4+1", "25");
+}
+
+// Tests for nested unary + and unary -
+static int check7(ostream& err_str)
+{
+	parser reader;
+	return
+		+check_eq(err_str, reader, "+1", "1")
+		+check_eq(err_str, reader, "++1", "1")
+		+check_eq(err_str, reader, "+-+1", "-1")
+		+check_eq(err_str, reader, "+-+-1", "1")
+		+check_eq(err_str, reader, "+-+-+1", "1")
+		+check_eq(err_str, reader, "100++--+1+10", "111");
+}
+
 int main(int argc, char** argv)
 {
 	cout << "examining old parser bugs" << flush;
@@ -107,6 +205,9 @@ int main(int argc, char** argv)
 	errors += check2(err_str);  cout << '.' << flush;
 	errors += check3(err_str);  cout << '.' << flush;
 	errors += check4(err_str);  cout << '.' << flush;
+	errors += check5(err_str);  cout << '.' << flush;
+	errors += check6(err_str);  cout << '.' << flush;
+	errors += check7(err_str);  cout << '.' << flush;
 	if (errors) {
 		cout << "Yes, unfortunately:" << endl;
 		cout << err_str.str();
diff --git a/ginac/parser/parse_binop_rhs.cpp b/ginac/parser/parse_binop_rhs.cpp
index 1b3d3254..a56a485c 100644
--- a/ginac/parser/parse_binop_rhs.cpp
+++ b/ginac/parser/parse_binop_rhs.cpp
@@ -114,6 +114,18 @@ ex parser::parse_binop_rhs(int expr_prec, ex& lhs)
 	}
 }
 
+/// unary_expr: [+-] expression
+ex parser::parse_unary_expr()
+{
+	// Parse a binary expression with the priority of exponentiation
+	// or higher. Ignore the overall sign, because parse_primary()
+	// handles it for us.
+	get_next_tok(); // Skip [+-]
+	ex lhs = parse_primary();
+	ex e = parse_binop_rhs(get_tok_prec('^'), lhs);
+	return e;
+}
+
 extern const numeric* _num_1_p;
 
 static ex make_minus_expr(const exvector& args)
@@ -137,6 +149,16 @@ static ex make_divide_expr(const exvector& args)
 	return dynallocate<mul>(args[0], rest);
 }
 
+static ex make_power_expr(const exvector& args)
+{
+	size_t n = args.size();
+	ex p = pow(args[n - 2], args[n - 1]);
+	for (size_t i = n - 2; i > 0; i--) {
+		p = pow(args[i - 1], p);
+	}
+	return p;
+}
+
 static ex make_binop_expr(const int binop, const exvector& args)
 {
 	switch (binop) {
@@ -149,11 +171,7 @@ static ex make_binop_expr(const int binop, const exvector& args)
 		case '/':
 			return make_divide_expr(args);
 		case '^':
-			if (args.size() != 2)
-				throw std::invalid_argument(
-						std::string(__func__) 
-						+ ": power should have exactly 2 operands");
-			return pow(args[0], args[1]);
+			return make_power_expr(args);
 		default:
 			throw std::invalid_argument(
 					std::string(__func__) 
diff --git a/ginac/parser/parser.cpp b/ginac/parser/parser.cpp
index 24f5136a..c584706f 100644
--- a/ginac/parser/parser.cpp
+++ b/ginac/parser/parser.cpp
@@ -27,6 +27,7 @@
 #include "mul.h"
 #include "constant.h"
 #include "function.h"
+#include "operators.h"
 
 #include <cstdint> // for uintptr_t
 #include <sstream>
@@ -144,27 +145,6 @@ ex parser::parse_lst_expr()
 	return list;
 }
 
-extern const ex _ex0;
-
-/// unary_expr: [+-] expression
-ex parser::parse_unary_expr()
-{
-	// Unlike most other parse_* method this one does NOT consume
-	// current token so parse_binop_rhs() knows what kind of operator
-	// is being parsed.
-	
-	// There are different kinds of expressions which need to be handled:
-	// -a+b 
-	// -(a) 
-	// +a
-	// +(a)
-	// Delegate the work to parse_binop_rhs(), otherwise we end up
-	// duplicating it here. 
-	ex lhs = _ex0; // silly trick
-	ex e = parse_binop_rhs(0, lhs);
-	return e;
-}
-
 /// primary: identifier_expr | number_expr | paren_expr | unary_expr 
 ex parser::parse_primary() 
 {
@@ -178,6 +158,7 @@ ex parser::parse_primary()
 		case '{': 
 			 return parse_lst_expr();
 		case '-':
+			 return -parse_unary_expr();
 		case '+':
 			 return parse_unary_expr();
 		case lexer::token_type::literal:


More information about the GiNaC-list mailing list