src/float/dfloat/cl_DF.h

   1 // cl_DF internals
   2
   3 #ifndef _CL_DF_H
   4 #define _CL_DF_H
   5
   6 #include "cln/number.h"
   7 #include "cln/malloc.h"
   8 #include "base/cl_low.h"
   9 #include "float/cl_F.h"
  10
  11 #ifdef FAST_DOUBLE
  12 #include "base/cl_N.h"
  13 #include "float/cl_F.h"
  14 #endif
  15
  16 namespace cln {
  17
  18 typedef // 64-bit float in IEEE format
  19         #if (cl_word_size==64)
  20           // Sign/Exponent/Mantissa
  21           uint64
  22         #else
  23           // Sign/Exponent/MantissaHigh and MantissaLow
  24           #if defined(double_wordorder_bigendian_p)
  25             #if double_wordorder_bigendian_p
  26               struct { uint32 semhi, mlo; }
  27             #else
  28               struct { uint32 mlo, semhi; }
  29             #endif
  30           #else
  31             #if CL_CPU_BIG_ENDIAN_P
  32               struct { uint32 semhi, mlo; }
  33             #else
  34               struct { uint32 mlo, semhi; }
  35             #endif
  36           #endif
  37         #endif
  38         dfloat;
  39
  40 union dfloatjanus {
  41         dfloat eksplicit;       // explicit value
  42         #ifdef FAST_DOUBLE
  43         double machine_double;  // value as a C `double'
  44         #endif
  45 };
  46 struct cl_heap_dfloat : cl_heap {
  47         dfloatjanus representation;
  48 };
  49 inline cl_heap_dfloat* TheDfloat (const cl_number& obj)
  50         { return (cl_heap_dfloat*)(obj.pointer); }
  51 inline dfloat& cl_dfloat_value (/* const ?? */ cl_DF& x)
  52 {
  53         return TheDfloat(x)->representation.eksplicit;
  54 }
  55
  56 // The double-word contains:
  57 //   |..|.......|........................................|
  58 //  sign exponent                  mantissa
  59
  60   #define DF_exp_len   11       // number of bits in the exponent
  61   #define DF_mant_len  52       // number of bits in the mantissa
  62                                 // (excluding the hidden bit)
  63   #define DF_exp_low   1                // minimum exponent
  64   #define DF_exp_mid   1022             // exponent bias
  65   #define DF_exp_high  2046             // maximum exponent, 2047 is NaN/Inf
  66   #define DF_exp_shift  (DF_mant_len+DF_mant_shift) // lowest exponent bit
  67   #define DF_mant_shift  0                          // lowest mantissa bit
  68   #define DF_sign_shift  (64 - 1)       // = (DF_exp_len+DF_mant_len)
  69
  70 // Private constructor.
  71 inline cl_DF::cl_DF (cl_heap_dfloat* ptr) : cl_F ((cl_private_thing) ptr) {}
  72
  73 extern cl_class cl_class_dfloat;
  74
  75 // Builds a float from the explicit words.
  76 #if (cl_word_size==64)
  77 inline cl_heap_dfloat* allocate_dfloat (dfloat eksplicit)
  78 {
  79         cl_heap_dfloat* p = (cl_heap_dfloat*) malloc_hook(sizeof(cl_heap_dfloat));
  80         p->refcount = 1;
  81         p->type = &cl_class_dfloat;
  82         p->representation.eksplicit = eksplicit;
  83         return p;
  84 }
  85 #else
  86 inline cl_heap_dfloat* allocate_dfloat (uint32 semhi, uint32 mlo)
  87 {
  88         cl_heap_dfloat* p = (cl_heap_dfloat*) malloc_hook(sizeof(cl_heap_dfloat));
  89         p->refcount = 1;
  90         p->type = &cl_class_dfloat;
  91         p->representation.eksplicit.semhi = semhi;
  92         p->representation.eksplicit.mlo   = mlo;
  93         return p;
  94 }
  95 #endif
  96
  97 // Double Float 0.0
  98   extern const cl_DF cl_DF_0;
  99 // Double Float 1.0
 100   extern const cl_DF cl_DF_1;
 101 // Double Float -1.0
 102   extern const cl_DF cl_DF_minus1;
 103
 104
 105 #define dfloat_value  representation.eksplicit
 106
 107 // Entpacken eines Double-Float:
 108 #if (cl_word_size==64)
 109 // DF_decode(obj, zero_statement, sign=,exp=,mant=);
 110 // zerlegt ein Double-Float obj.
 111 // Ist obj=0.0, wird zero_statement ausgeführt.
 112 // Sonst: cl_signean sign = Vorzeichen (0 = +, -1 = -),
 113 //        sintL exp = Exponent (vorzeichenbehaftet),
 114 //        uintQ mant = Mantisse (>= 2^DF_mant_len, < 2^(DF_mant_len+1))
 115   #define dfloat_value_semhi  dfloat_value
 116   #define DF_uexp(x)  (((x) >> DF_mant_len) & (bit(DF_exp_len)-1))
 117   #define DF_decode(obj, zero_statement, sign_zuweisung,exp_zuweisung,mant_zuweisung)  \
 118     { var dfloat _x = TheDfloat(obj)->dfloat_value;                     \
 119       var uintL uexp = DF_uexp(_x);                                     \
 120       if (uexp==0)                                                      \
 121         { zero_statement } /* e=0 -> Zahl 0.0 */                        \
 122         else                                                            \
 123         { exp_zuweisung (sintL)(uexp - DF_exp_mid); /* Exponent */      \
 124           unused (sign_zuweisung ((sint64)_x >> 63)); /* Vorzeichen */  \
 125           mant_zuweisung (bit(DF_mant_len) | (_x & (bit(DF_mant_len)-1))); \
 126     }   }
 127 #else
 128 // DF_decode2(obj, zero_statement, sign=,exp=,manthi=,mantlo=);
 129 // zerlegt ein Double-Float obj.
 130 // Ist obj=0.0, wird zero_statement ausgeführt.
 131 // Sonst: cl_signean sign = Vorzeichen (0 = +, -1 = -),
 132 //        sintL exp = Exponent (vorzeichenbehaftet),
 133 //        uintL manthi,mantlo = Mantisse 2^32*manthi+mantlo
 134 //                              (>= 2^DF_mant_len, < 2^(DF_mant_len+1))
 135   #define dfloat_value_semhi  dfloat_value.semhi
 136   #define DF_uexp(semhi)  (((semhi) >> (DF_mant_len-32)) & (bit(DF_exp_len)-1))
 137   #define DF_decode2(obj, zero_statement, sign_zuweisung,exp_zuweisung,manthi_zuweisung,mantlo_zuweisung)  \
 138     { var uint32 semhi = TheDfloat(obj)->dfloat_value.semhi;            \
 139       var uint32 mlo = TheDfloat(obj)->dfloat_value.mlo;                \
 140       var uintL uexp = DF_uexp(semhi);                                  \
 141       if (uexp==0)                                                      \
 142         { zero_statement } /* e=0 -> Zahl 0.0 */                        \
 143         else                                                            \
 144         { exp_zuweisung (sintL)(uexp - DF_exp_mid); /* Exponent */      \
 145           unused (sign_zuweisung sign_of((sint32)(semhi))); /* Vorzeichen */\
 146           manthi_zuweisung (bit(DF_mant_len-32) | (semhi & (bit(DF_mant_len-32)-1))); \
 147           mantlo_zuweisung mlo;                                         \
 148     }   }
 149 #endif
 150
 151 // Einpacken eines Double-Float:
 152 #if (cl_word_size==64)
 153 // encode_DF(sign,exp,mant)
 154 // liefert ein Double-Float.
 155 // > cl_signean sign: Vorzeichen, 0 für +, -1 für negativ.
 156 // > sintL exp: Exponent
 157 // > uintQ mant: Mantisse, sollte >= 2^DF_mant_len und < 2^(DF_mant_len+1) sein.
 158 // < cl_DF ergebnis: ein Double-Float
 159 // Der Exponent wird auf Überlauf/Unterlauf getestet.
 160 inline const cl_DF encode_DF (cl_signean sign, sintL exp, uintQ mant)
 161 {
 162       if (exp < (sintL)(DF_exp_low-DF_exp_mid))
 163         { if (underflow_allowed())
 164             { throw floating_point_underflow_exception(); }
 165             else
 166             { return cl_DF_0; }
 167         }
 168       else
 169       if (exp > (sintL)(DF_exp_high-DF_exp_mid))
 170         { throw floating_point_overflow_exception(); }
 171       else
 172       return allocate_dfloat
 173         (  ((sint64)sign & bit(63))                  /* Vorzeichen */
 174          | ((uint64)(exp+DF_exp_mid) << DF_mant_len) /* Exponent   */
 175          | ((uint64)mant & (bit(DF_mant_len)-1))     /* Mantisse   */
 176         );
 177 }
 178 #else
 179 // encode_DF(sign,exp,manthi,mantlo)
 180 // liefert ein Double-Float.
 181 // > cl_signean sign: Vorzeichen, 0 für +, -1 für negativ.
 182 // > sintL exp: Exponent
 183 // > uintL manthi,mantlo: Mantisse 2^32*manthi+mantlo,
 184 //                        sollte >= 2^DF_mant_len und < 2^(DF_mant_len+1) sein.
 185 // < cl_DF ergebnis: ein Double-Float
 186 // Der Exponent wird auf Überlauf/Unterlauf getestet.
 187 inline const cl_DF encode_DF (cl_signean sign, sintL exp, uintL manthi, uintL mantlo)
 188 {
 189       if (exp < (sintL)(DF_exp_low-DF_exp_mid))
 190         { if (underflow_allowed())
 191             { throw floating_point_underflow_exception(); }
 192             else
 193             { return cl_DF_0; }
 194         }
 195       else
 196       if (exp > (sintL)(DF_exp_high-DF_exp_mid))
 197         { throw floating_point_overflow_exception(); }
 198       else
 199       return allocate_dfloat
 200         (  ((sint32)sign & bit(31))                       /* Vorzeichen */
 201          | ((uint32)(exp+DF_exp_mid) << (DF_mant_len-32)) /* Exponent   */
 202          | ((uint32)manthi & (bit(DF_mant_len-32)-1))     /* Mantisse   */
 203          , mantlo
 204         );
 205 }
 206 #endif
 207
 208 #ifdef FAST_DOUBLE
 209 // Auspacken eines Double:
 210 inline double DF_to_double (const cl_DF& obj)
 211 {
 212         return TheDfloat(obj)->representation.machine_double;
 213 }
 214 // Überprüfen und Einpacken eines von den 'double'-Routinen gelieferten
 215 // IEEE-Floats.
 216 // Klassifikation:
 217 //   1 <= e <= 2046 : normalisierte Zahl
 218 //   e=0, m/=0: subnormale Zahl
 219 //   e=0, m=0: vorzeichenbehaftete 0.0
 220 //   e=2047, m=0: vorzeichenbehaftete Infinity
 221 //   e=2047, m/=0: NaN
 222 // Angabe der möglicherweise auftretenden Sonderfälle:
 223 //   maybe_overflow: Operation läuft über, liefert IEEE-Infinity
 224 //   maybe_subnormal: Ergebnis sehr klein, liefert IEEE-subnormale Zahl
 225 //   maybe_underflow: Ergebnis sehr klein und /=0, liefert IEEE-Null
 226 //   maybe_divide_0: Ergebnis unbestimmt, liefert IEEE-Infinity
 227 //   maybe_nan: Ergebnis unbestimmt, liefert IEEE-NaN
 228 #if (cl_word_size==64)
 229   #define double_to_DF(expr,ergebnis_zuweisung,maybe_overflow,maybe_subnormal,maybe_underflow,maybe_divide_0,maybe_nan)  \
 230     { var dfloatjanus _erg; _erg.machine_double = (expr);               \
 231       if ((_erg.eksplicit & ((uint64)bit(DF_exp_len+DF_mant_len)-bit(DF_mant_len))) == 0) /* e=0 ? */\
 232         { if ((maybe_underflow                                          \
 233                || (maybe_subnormal && !((_erg.eksplicit << 1) == 0))    \
 234               )                                                         \
 235               && underflow_allowed()                                    \
 236              )                                                          \
 237             { throw floating_point_underflow_exception(); } /* subnormal oder noch kleiner -> Underflow */\
 238             else                                                        \
 239             { ergebnis_zuweisung cl_DF_0; } /* +/- 0.0 -> 0.0 */        \
 240         }                                                               \
 241       elif ((maybe_overflow || maybe_divide_0)                          \
 242             && (((~_erg.eksplicit) & ((uint64)bit(DF_exp_len+DF_mant_len)-bit(DF_mant_len))) == 0) /* e=2047 ? */\
 243            )                                                            \
 244         { if (maybe_nan && !((_erg.eksplicit<<(64-DF_mant_len)) == 0))  \
 245             { throw division_by_0_exception(); } /* NaN, also Singularität -> "Division durch 0" */\
 246           else /* Infinity */                                           \
 247           if (!maybe_overflow || maybe_divide_0)                        \
 248             { throw division_by_0_exception(); } /* Infinity, Division durch 0 */\
 249             else                                                        \
 250             { throw floating_point_overflow_exception(); } /* Infinity, Overflow */\
 251         }                                                               \
 252       else                                                              \
 253         { ergebnis_zuweisung allocate_dfloat(_erg.eksplicit); }         \
 254     }
 255 #else
 256   #define double_to_DF(expr,ergebnis_zuweisung,maybe_overflow,maybe_subnormal,maybe_underflow,maybe_divide_0,maybe_nan)  \
 257     { var dfloatjanus _erg; _erg.machine_double = (expr);                 \
 258       if ((_erg.eksplicit.semhi & ((uint32)bit(DF_exp_len+DF_mant_len-32)-bit(DF_mant_len-32))) == 0) /* e=0 ? */\
 259         { if ((maybe_underflow                                            \
 260                || (maybe_subnormal                                        \
 261                    && !(((_erg.eksplicit.semhi << 1) == 0) && (_erg.eksplicit.mlo == 0)) \
 262               )   )                                                       \
 263               && underflow_allowed()                                      \
 264              )                                                            \
 265             { throw floating_point_underflow_exception(); } /* subnormal oder noch kleiner -> Underflow */\
 266             else                                                          \
 267             { ergebnis_zuweisung cl_DF_0; } /* +/- 0.0 -> 0.0           */\
 268         }                                                                 \
 269       elif ((maybe_overflow || maybe_divide_0)                            \
 270             && (((~_erg.eksplicit.semhi) & ((uint32)bit(DF_exp_len+DF_mant_len-32)-bit(DF_mant_len-32))) == 0) /* e=2047 ?  */\
 271            )                                                              \
 272         { if (maybe_nan && !(((_erg.eksplicit.semhi<<(64-DF_mant_len)) == 0) && (_erg.eksplicit.mlo==0))) \
 273             { throw division_by_0_exception(); } /* NaN, also Singularität -> "Division durch 0"  */\
 274           else /* Infinity                                              */\
 275           if (!maybe_overflow || maybe_divide_0)                          \
 276             { throw division_by_0_exception(); } /* Infinity, Division durch 0 */\
 277             else                                                          \
 278             { throw floating_point_overflow_exception(); } /* Infinity, Overflow */\
 279         }                                                                 \
 280       else                                                                \
 281         { ergebnis_zuweisung allocate_dfloat(_erg.eksplicit.semhi,_erg.eksplicit.mlo); }  \
 282     }
 283 #endif
 284 #endif
 285
 286 // Liefert zu einem Double-Float x : (futruncate x), ein DF.
 287 // x wird von der 0 weg zur nächsten ganzen Zahl gerundet.
 288 extern const cl_DF futruncate (const cl_DF& x);
 289
 290 // DF_to_I(x) wandelt ein Double-Float x, das eine ganze Zahl darstellt,
 291 // in ein Integer um.
 292 extern const cl_I cl_DF_to_I (const cl_DF& x);
 293
 294 // cl_I_to_DF(x) wandelt ein Integer x in ein Double-Float um und rundet dabei.
 295 extern const cl_DF cl_I_to_DF (const cl_I& x);
 296
 297 // cl_RA_to_DF(x) wandelt eine rationale Zahl x in ein Double-Float um
 298 // und rundet dabei.
 299 extern const cl_DF cl_RA_to_DF (const cl_RA& x);
 300
 301
 302 // IEEE-Double-Float:
 303 // Bit 63 = s, Bits 62..52 = e, Bits 51..0 = m.
 304 //   e=0, m=0: vorzeichenbehaftete 0.0
 305 //   e=0, m/=0: subnormale Zahl,
 306 //     Wert = (-1)^s * 2^(1-1022) * [ 0 . 0 m51 ... m0 ]
 307 //   1 <= e <= 2046 : normalisierte Zahl,
 308 //     Wert = (-1)^s * 2^(e-1022) * [ 0 . 1 m51 ... m0 ]
 309 //   e=2047, m=0: vorzeichenbehaftete Infinity
 310 //   e=2047, m/=0: NaN
 311
 312 // cl_double_to_DF_pointer(val) wandelt ein IEEE-Double-Float val in ein Double-Float um.
 313 extern cl_heap_dfloat* cl_double_to_DF_pointer (const double val);
 314
 315 // cl_DF_to_double(obj,&val);
 316 // wandelt ein Double-Float obj in ein IEEE-Double-Float val um.
 317 extern void cl_DF_to_double (const cl_DF& obj, dfloatjanus* val_);
 318
 319 }  // namespace cln
 320
 321 #endif /* _CL_DF_H */