src/float/cl_F.h

   1 // cl_F internals
   2
   3 #ifndef _CL_F_H
   4 #define _CL_F_H
   5
   6 #include "cln/number.h"
   7 #include "cl_macros.h"
   8 #include "cln/float.h"
   9
  10 namespace cln {
  11
  12 nonreturning_function(extern, cl_error_floating_point_overflow, (void));
  13 nonreturning_function(extern, cl_error_floating_point_underflow, (void));
  14
  15 #define underflow_allowed()  (! cl_inhibit_floating_point_underflow)
  16
  17
  18 // For all floating-point formats:
  19 // Sign s, Exponent e, Mantissa mk-1,...,m0
  20 // represents the number (-1)^s * 2^(e-_EXP_MID) * [0 . 1 mk-1 ... m0]
  21 // e=0 represents the number 0, always with sign s=0 (and mantissa =0).
  22 // _exp_low and _exp_high are (inclusive) bounds for e.
  23 // Bits for   Sign s    Exponent e    Mantissa m (= k)
  24 // SF           1           8             16
  25 // FF           1           8             23
  26 // DF           1          11             52
  27 // LF           1          32         intDsize*n >= 53
  28
  29
  30 // Konversionen ohne Rundung:
  31
  32 // cl_SF_to_FF(x) wandelt ein Short-Float x in ein Single-Float um.
  33 extern const cl_FF cl_SF_to_FF (const cl_SF& x);
  34
  35 // cl_SF_to_DF(x) wandelt ein Short-Float x in ein Double-Float um.
  36 extern const cl_DF cl_SF_to_DF (const cl_SF& x);
  37
  38 // cl_SF_to_LF(x,len) wandelt ein Short-Float x in ein Long-Float mit len Digits um.
  39 // > uintC len: gewünschte Anzahl Digits, >=LF_minlen
  40 extern const cl_LF cl_SF_to_LF (const cl_SF& x, uintC len);
  41
  42 // cl_FF_to_DF(x) wandelt ein Single-Float x in ein Double-Float um.
  43 extern const cl_DF cl_FF_to_DF (const cl_FF& x);
  44
  45 // cl_FF_to_LF(x,len) wandelt ein Single-Float x in ein Long-Float mit len Digits um.
  46 // > uintC len: gewünschte Anzahl Digits, >=LF_minlen
  47 extern const cl_LF cl_FF_to_LF (const cl_FF& x, uintC len);
  48
  49 // cl_DF_to_LF(x,len) wandelt ein Double-Float x in ein Long-Float mit len Digits um.
  50 // > uintC len: gewünschte Anzahl Digits, >=LF_minlen
  51 extern const cl_LF cl_DF_to_LF (const cl_DF& x, uintC len);
  52
  53
  54 // Konversionen mit Rundung:
  55
  56 // cl_FF_to_SF(x) wandelt ein Single-Float x in ein Short-Float um.
  57 extern const cl_SF cl_FF_to_SF (const cl_FF& x);
  58
  59 // cl_DF_to_SF(x) wandelt ein Double-Float x in ein Short-Float um.
  60 extern const cl_SF cl_DF_to_SF (const cl_DF& x);
  61
  62 // cl_LF_to_SF(x) wandelt ein Long-Float x in ein Short-Float um.
  63 extern const cl_SF cl_LF_to_SF (const cl_LF& x);
  64
  65 // cl_DF_to_FF(x) wandelt ein Double-Float x in ein Single-Float um.
  66 extern const cl_FF cl_DF_to_FF (const cl_DF& x);
  67
  68 // cl_LF_to_FF(x) wandelt ein Long-Float x in ein Single-Float um.
  69 extern const cl_FF cl_LF_to_FF (const cl_LF& x);
  70
  71 // cl_LF_to_DF(x) wandelt ein Long-Float x in ein Double-Float um.
  72 extern const cl_DF cl_LF_to_DF (const cl_LF& x);
  73
  74
  75 // Fehlermeldung wegen NaN
  76 nonreturning_function(extern, cl_error_floating_point_nan, (void));
  77
  78
  79 // Runtime typing support.
  80 extern cl_class cl_class_ffloat;
  81 extern cl_class cl_class_dfloat;
  82 extern cl_class cl_class_lfloat;
  83
  84 // Type test.
  85 inline cl_boolean longfloatp (const cl_F& x)
  86 {
  87         if (x.pointer_p())
  88                 if (x.pointer_type() == &cl_class_lfloat)
  89                         return cl_true;
  90         return cl_false;
  91 }
  92
  93 // Macro: verteilt je nach Float-Typ eines Floats x auf 4 Statements.
  94 // floattypecase(x, SF_statement,FF_statement,DF_statement,LF_statement);
  95 // x sollte eine Variable sein.
  96 #ifdef CL_WIDE_POINTERS
  97   #define floattypecase(x, SF_statement,FF_statement,DF_statement,LF_statement) \
  98     if (!(x).pointer_p())                                               \
  99       switch ((x).nonpointer_tag())                                     \
 100         { case cl_SF_tag: { SF_statement } break;                       \
 101           case cl_FF_tag: { FF_statement } break;                       \
 102           default: NOTREACHED                                           \
 103         }                                                               \
 104       else {                                                            \
 105         if ((x).pointer_type() == &cl_class_dfloat) { DF_statement }    \
 106         else if ((x).pointer_type() == &cl_class_lfloat) { LF_statement } \
 107         else NOTREACHED                                                 \
 108       }
 109 #else
 110   #define floattypecase(x, SF_statement,FF_statement,DF_statement,LF_statement) \
 111     if (!(x).pointer_p())                                               \
 112       switch ((x).nonpointer_tag())                                     \
 113         { case cl_SF_tag: { SF_statement } break;                       \
 114           default: NOTREACHED                                           \
 115         }                                                               \
 116       else {                                                            \
 117         if ((x).pointer_type() == &cl_class_ffloat) { FF_statement }    \
 118         else if ((x).pointer_type() == &cl_class_dfloat) { DF_statement } \
 119         else if ((x).pointer_type() == &cl_class_lfloat) { LF_statement } \
 120         else NOTREACHED                                                 \
 121       }
 122 #endif
 123
 124 // Macro: verteilt je nach Float-Typ eines Floats x auf 4 Statements,
 125 // die x vom jeweiligen Float-Typ benutzen dürfen.
 126 // floatcase(x, SF_statement,FF_statement,DF_statement,LF_statement);
 127 // x sollte eine Variable sein.
 128   #define floatcase(x, SF_statement,FF_statement,DF_statement,LF_statement) \
 129     floattypecase(x                                                        \
 130       , var cl_SF& __tmp = *(cl_SF*)&x; var cl_SF& x = __tmp; SF_statement \
 131       , var cl_FF& __tmp = *(cl_FF*)&x; var cl_FF& x = __tmp; FF_statement \
 132       , var cl_DF& __tmp = *(cl_DF*)&x; var cl_DF& x = __tmp; DF_statement \
 133       , var cl_LF& __tmp = *(cl_LF*)&x; var cl_LF& x = __tmp; LF_statement \
 134       )
 135
 136
 137 // GEN_F_OP1(arg1,F_OP,ergebnis_zuweisung)
 138 // generates the body of a float operation with one argument.
 139 // LF_OP is executed once the argument has been converted to its exact
 140 // float type.
 141 #define GEN_F_OP1(arg1,F_OP,ergebnis_zuweisung)  \
 142 {                                                                       \
 143         floatcase(arg1                                                  \
 144         , /* SF */      ergebnis_zuweisung F_OP(arg1);                  \
 145         , /* FF */      ergebnis_zuweisung F_OP(arg1);                  \
 146         , /* DF */      ergebnis_zuweisung F_OP(arg1);                  \
 147         , /* LF */      ergebnis_zuweisung F_OP(arg1);                  \
 148         );                                                              \
 149 }
 150
 151
 152 // GEN_F_OP2(arg1,arg2,F_OP,r,s,ergebnis_zuweisung)
 153 // generates the body of a float operation with two arguments.
 154 // F_OP is executed once both arguments have been converted to the same
 155 // float format (the longer one of arg1 and arg2). The r results are then
 156 // converted the shorter of the two float formats. (r = 0,1,2.)
 157 // s = 0,1. s=0 means the LF operation needs two long-floats of the same size.
 158 // s=1 means they may be of different sizes.
 159 #define GEN_F_OP2(arg1,arg2,F_OP,r,s,ergebnis_zuweisung)  \
 160 {                                                                       \
 161         floatcase(arg1                                                  \
 162         , /* arg1 SF */                                                 \
 163                 floatcase(arg2                                          \
 164                 , /* arg2 SF */                                         \
 165                         ergebnis_zuweisung CONCAT(NOMAP,r)(SF,          \
 166                         F_OP(arg1,arg2) );                              \
 167                 , /* arg2 FF */                                         \
 168                         ergebnis_zuweisung CONCAT(MAP,r)(FF,cl_FF_to_SF,\
 169                         F_OP(cl_SF_to_FF(arg1),arg2) );                 \
 170                 , /* arg2 DF */                                         \
 171                         ergebnis_zuweisung CONCAT(MAP,r)(DF,cl_DF_to_SF,\
 172                         F_OP(cl_SF_to_DF(arg1),arg2) );                 \
 173                 , /* arg2 LF */                                         \
 174                         ergebnis_zuweisung CONCAT(MAP,r)(LF,cl_LF_to_SF,\
 175                         F_OP(cl_SF_to_LF(arg1,CONCAT(LFlen,s)(arg2)),arg2) ); \
 176                 );                                                      \
 177         , /* arg1 FF */                                                 \
 178                 floatcase(arg2                                          \
 179                 , /* arg2 SF */                                         \
 180                         ergebnis_zuweisung CONCAT(MAP,r)(FF,cl_FF_to_SF,\
 181                         F_OP(arg1,cl_SF_to_FF(arg2)) );                 \
 182                 , /* arg2 FF */                                         \
 183                         ergebnis_zuweisung CONCAT(NOMAP,r)(FF,          \
 184                         F_OP(arg1,arg2) );                              \
 185                 , /* arg2 DF */                                         \
 186                         ergebnis_zuweisung CONCAT(MAP,r)(DF,cl_DF_to_FF,\
 187                         F_OP(cl_FF_to_DF(arg1),arg2) );                 \
 188                 , /* arg2 LF */                                         \
 189                         ergebnis_zuweisung CONCAT(MAP,r)(LF,cl_LF_to_FF,\
 190                         F_OP(cl_FF_to_LF(arg1,CONCAT(LFlen,s)(arg2)),arg2) ); \
 191                 );                                                      \
 192         , /* arg1 DF */                                                 \
 193                 floatcase(arg2                                          \
 194                 , /* arg2 SF */                                         \
 195                         ergebnis_zuweisung CONCAT(MAP,r)(DF,cl_DF_to_SF,\
 196                         F_OP(arg1,cl_SF_to_DF(arg2)) );                 \
 197                 , /* arg2 FF */                                         \
 198                         ergebnis_zuweisung CONCAT(MAP,r)(DF,cl_DF_to_FF,\
 199                         F_OP(arg1,cl_FF_to_DF(arg2)) );                 \
 200                 , /* arg2 DF */                                         \
 201                         ergebnis_zuweisung CONCAT(NOMAP,r)(DF,          \
 202                         F_OP(arg1,arg2) );                              \
 203                 , /* arg2 LF */                                         \
 204                         ergebnis_zuweisung CONCAT(MAP,r)(LF,cl_LF_to_DF,\
 205                         F_OP(cl_DF_to_LF(arg1,CONCAT(LFlen,s)(arg2)),arg2) ); \
 206                 );                                                      \
 207         , /* arg1 LF */                                                 \
 208                 floatcase(arg2                                          \
 209                 , /* arg2 SF */                                         \
 210                         ergebnis_zuweisung CONCAT(MAP,r)(LF,cl_LF_to_SF,\
 211                         F_OP(arg1,cl_SF_to_LF(arg2,CONCAT(LFlen,s)(arg1))) ); \
 212                 , /* arg2 FF */                                         \
 213                         ergebnis_zuweisung CONCAT(MAP,r)(LF,cl_LF_to_FF,\
 214                         F_OP(arg1,cl_FF_to_LF(arg2,CONCAT(LFlen,s)(arg1))) ); \
 215                 , /* arg2 DF */                                         \
 216                         ergebnis_zuweisung CONCAT(MAP,r)(LF,cl_LF_to_DF,\
 217                         F_OP(arg1,cl_DF_to_LF(arg2,CONCAT(LFlen,s)(arg1))) ); \
 218                 , /* arg2 LF */                                         \
 219                         GEN_LF_OP2_AUX(arg1,arg2,F_OP,r,s,ergebnis_zuweisung) \
 220                 );                                                      \
 221         );                                                              \
 222 }
 223 #define GEN_LF_OP2_AUX(arg1,arg2,F_OP,r,s,ergebnis_zuweisung)  \
 224   CONCAT(GEN_LF_OP2_AUX,s)(arg1,arg2,F_OP,r,ergebnis_zuweisung)
 225 #define GEN_LF_OP2_AUX0(arg1,arg2,F_OP,r,ergebnis_zuweisung)  \
 226   var uintC len1 = TheLfloat(arg1)->len;                                \
 227   var uintC len2 = TheLfloat(arg2)->len;                                \
 228   if (len1 == len2) /* gleich -> direkt ausführen */                    \
 229     { ergebnis_zuweisung CONCAT(NOMAP,r) (LF, F_OP(arg1,arg2)); }       \
 230   elif (len1 > len2) /* -> arg2 auf die Länge von arg1 bringen */       \
 231     { ergebnis_zuweisung CONCAT(MAP,r) (LF, LF_shorten_len2,            \
 232       F_OP(arg1,extend(arg2,len1)) );                                   \
 233     }                                                                   \
 234   else /* (len1 < len2) -> arg1 auf die Länge von arg2 bringen */       \
 235     { ergebnis_zuweisung CONCAT(MAP,r) (LF, LF_shorten_len1,            \
 236       F_OP(extend(arg1,len2),arg2) );                                   \
 237     }
 238 #define LF_shorten_len1(arg)  shorten(arg,len1)
 239 #define LF_shorten_len2(arg)  shorten(arg,len2)
 240 #define GEN_LF_OP2_AUX1(arg1,arg2,F_OP,r,ergebnis_zuweisung)  \
 241   ergebnis_zuweisung CONCAT(NOMAP,r) (LF, F_OP(arg1,arg2));
 242
 243 #define NOMAP0(F,EXPR)  EXPR
 244 #define NOMAP1(F,EXPR)  EXPR
 245 #define MAP0(F,FN,EXPR)  EXPR
 246 #define MAP1(F,FN,EXPR)  FN(EXPR)
 247
 248 #define LFlen0(arg)  TheLfloat(arg)->len
 249 #define LFlen1(arg)  LF_minlen
 250
 251
 252 // cl_F_extendsqrt(x) erweitert die Genauigkeit eines Floats x um eine Stufe
 253 // SF -> FF -> DF -> LF(4) -> LF(5) -> LF(6) -> ...
 254 // Ein Float mit d Mantissenbits wird so zu einem Float mit
 255 // mindestens d+sqrt(d)+2 Mantissenbits.
 256 extern const cl_F cl_F_extendsqrt (const cl_F& x);
 257
 258 // cl_F_extendsqrtx(x) erweitert die Genauigkeit eines Floats x um eine Stufe
 259 // SF -> FF -> DF -> LF(4) -> LF(5) -> LF(6) -> ...
 260 // Ein Float mit d Mantissenbits und l Exponentenbits wird so zu einem Float
 261 // mit mindestens d+sqrt(d)+2+(l-1) Mantissenbits.
 262 extern const cl_F cl_F_extendsqrtx (const cl_F& x);
 263
 264 // cl_F_shortenrelative(x,y) tries to reduce the size of x, such that one
 265 // wouldn't notice it when adding x to y. y must be /= 0. More precisely,
 266 // this returns a float approximation of x, such that 1 ulp(x) < 1 ulp(y).
 267 extern const cl_F cl_F_shortenrelative (const cl_F& x, const cl_F& y);
 268
 269
 270 // Macro: dispatches according to a float_format_t value.
 271 // floatformatcase(value, SF_statement,FF_statement,DF_statement,LF_statement)
 272 // LF_statement darf auf `len' zugreifen, die zu `value' korrespondierende
 273 // Mantissenlänge (gemessen in Digits).
 274   #define floatformatcase(value, SF_statement,FF_statement,DF_statement,LF_statement)  \
 275     { if ((value) <= float_format_sfloat) { SF_statement }              \
 276       elif ((value) <= float_format_ffloat) { FF_statement }            \
 277       elif ((value) <= float_format_dfloat) { DF_statement }            \
 278       else { var uintL len = ceiling((uintL)(value),intDsize); LF_statement } \
 279     }
 280
 281 }  // namespace cln
 282
 283 #endif /* _CL_F_H */