src/float/dfloat/cl_DF.h

   1 // cl_DF internals
   2
   3 #ifndef _CL_DF_H
   4 #define _CL_DF_H
   5
   6 #include "cl_number.h"
   7 #include "cl_malloc.h"
   8 #include "cl_low.h"
   9 #include "cl_F.h"
  10
  11 typedef // 64-bit float in IEEE format
  12         #if (cl_word_size==64)
  13           // Sign/Exponent/Mantissa
  14           uint64
  15         #else
  16           // Sign/Exponent/MantissaHigh and MantissaLow
  17           #if CL_CPU_BIG_ENDIAN_P
  18             struct { uint32 semhi, mlo; }
  19           #else
  20             struct { uint32 mlo, semhi; }
  21           #endif
  22         #endif
  23         dfloat;
  24
  25 union dfloatjanus {
  26         dfloat eksplicit;       // explicit value
  27         #ifdef FAST_DOUBLE
  28         double machine_double;  // value as a C `double'
  29         #endif
  30 };
  31 struct cl_heap_dfloat : cl_heap {
  32         dfloatjanus representation;
  33 };
  34 inline cl_heap_dfloat* TheDfloat (const cl_number& obj)
  35         { return (cl_heap_dfloat*)(obj.pointer); }
  36 inline dfloat& cl_dfloat_value (/* const ?? */ cl_DF& x)
  37 {
  38         return TheDfloat(x)->representation.eksplicit;
  39 }
  40
  41 // The double-word contains:
  42 //   |..|.......|........................................|
  43 //  sign exponent                  mantissa
  44
  45   #define DF_exp_len   11       // number of bits in the exponent
  46   #define DF_mant_len  52       // number of bits in the mantissa
  47                                 // (excluding the hidden bit)
  48   #define DF_exp_low   1                // minimum exponent
  49   #define DF_exp_mid   1022             // exponent bias
  50   #define DF_exp_high  2046             // maximum exponent, 2047 is NaN/Inf
  51   #define DF_exp_shift  (DF_mant_len+DF_mant_shift) // lowest exponent bit
  52   #define DF_mant_shift  0                          // lowest mantissa bit
  53   #define DF_sign_shift  (64 - 1)       // = (DF_exp_len+DF_mant_len)
  54
  55 // Private constructor.
  56 inline cl_DF::cl_DF (cl_heap_dfloat* ptr) : cl_F ((cl_private_thing) ptr) {}
  57
  58 extern cl_class cl_class_dfloat;
  59
  60 // Builds a float from the explicit words.
  61 #if (cl_word_size==64)
  62 inline cl_heap_dfloat* allocate_dfloat (dfloat eksplicit)
  63 {
  64         cl_heap_dfloat* p = (cl_heap_dfloat*) cl_malloc_hook(sizeof(cl_heap_dfloat));
  65         p->refcount = 1;
  66         p->type = &cl_class_dfloat;
  67         p->representation.eksplicit = eksplicit;
  68         return p;
  69 }
  70 #else
  71 inline cl_heap_dfloat* allocate_dfloat (uint32 semhi, uint32 mlo)
  72 {
  73         cl_heap_dfloat* p = (cl_heap_dfloat*) cl_malloc_hook(sizeof(cl_heap_dfloat));
  74         p->refcount = 1;
  75         p->type = &cl_class_dfloat;
  76         p->representation.eksplicit.semhi = semhi;
  77         p->representation.eksplicit.mlo   = mlo;
  78         return p;
  79 }
  80 #endif
  81
  82 // Double Float 0.0
  83   extern const cl_DF cl_DF_0;
  84 // Double Float 1.0
  85   extern const cl_DF cl_DF_1;
  86 // Double Float -1.0
  87   extern const cl_DF cl_DF_minus1;
  88
  89
  90 #define dfloat_value  representation.eksplicit
  91
  92 // Entpacken eines Double-Float:
  93 #if (cl_word_size==64)
  94 // DF_decode(obj, zero_statement, sign=,exp=,mant=);
  95 // zerlegt ein Double-Float obj.
  96 // Ist obj=0.0, wird zero_statement ausgeführt.
  97 // Sonst: cl_signean sign = Vorzeichen (0 = +, -1 = -),
  98 //        sintL exp = Exponent (vorzeichenbehaftet),
  99 //        uintQ mant = Mantisse (>= 2^DF_mant_len, < 2^(DF_mant_len+1))
 100   #define dfloat_value_semhi  dfloat_value
 101   #define DF_uexp(x)  (((x) >> DF_mant_len) & (bit(DF_exp_len)-1))
 102   #define DF_decode(obj, zero_statement, sign_zuweisung,exp_zuweisung,mant_zuweisung)  \
 103     { var dfloat _x = TheDfloat(obj)->dfloat_value;                     \
 104       var uintL uexp = DF_uexp(_x);                                     \
 105       if (uexp==0)                                                      \
 106         { zero_statement } /* e=0 -> Zahl 0.0 */                        \
 107         else                                                            \
 108         { exp_zuweisung (sintL)(uexp - DF_exp_mid); /* Exponent */      \
 109           unused (sign_zuweisung ((sint64)_x >> 63)); /* Vorzeichen */  \
 110           mant_zuweisung (bit(DF_mant_len) | (_x & (bit(DF_mant_len)-1))); \
 111     }   }
 112 #else
 113 // DF_decode2(obj, zero_statement, sign=,exp=,manthi=,mantlo=);
 114 // zerlegt ein Double-Float obj.
 115 // Ist obj=0.0, wird zero_statement ausgeführt.
 116 // Sonst: cl_signean sign = Vorzeichen (0 = +, -1 = -),
 117 //        sintL exp = Exponent (vorzeichenbehaftet),
 118 //        uintL manthi,mantlo = Mantisse 2^32*manthi+mantlo
 119 //                              (>= 2^DF_mant_len, < 2^(DF_mant_len+1))
 120   #define dfloat_value_semhi  dfloat_value.semhi
 121   #define DF_uexp(semhi)  (((semhi) >> (DF_mant_len-32)) & (bit(DF_exp_len)-1))
 122   #define DF_decode2(obj, zero_statement, sign_zuweisung,exp_zuweisung,manthi_zuweisung,mantlo_zuweisung)  \
 123     { var uint32 semhi = TheDfloat(obj)->dfloat_value.semhi;            \
 124       var uint32 mlo = TheDfloat(obj)->dfloat_value.mlo;                \
 125       var uintL uexp = DF_uexp(semhi);                                  \
 126       if (uexp==0)                                                      \
 127         { zero_statement } /* e=0 -> Zahl 0.0 */                        \
 128         else                                                            \
 129         { exp_zuweisung (sintL)(uexp - DF_exp_mid); /* Exponent */      \
 130           unused (sign_zuweisung sign_of((sint32)(semhi))); /* Vorzeichen */\
 131           manthi_zuweisung (bit(DF_mant_len-32) | (semhi & (bit(DF_mant_len-32)-1))); \
 132           mantlo_zuweisung mlo;                                         \
 133     }   }
 134 #endif
 135
 136 // Einpacken eines Double-Float:
 137 #if (cl_word_size==64)
 138 // encode_DF(sign,exp,mant)
 139 // liefert ein Double-Float.
 140 // > cl_signean sign: Vorzeichen, 0 für +, -1 für negativ.
 141 // > sintL exp: Exponent
 142 // > uintQ mant: Mantisse, sollte >= 2^DF_mant_len und < 2^(DF_mant_len+1) sein.
 143 // < cl_DF ergebnis: ein Double-Float
 144 // Der Exponent wird auf Überlauf/Unterlauf getestet.
 145 inline const cl_DF encode_DF (cl_signean sign, sintL exp, uintQ mant)
 146 {
 147       if (exp < (sintL)(DF_exp_low-DF_exp_mid))
 148         { if (underflow_allowed())
 149             { cl_error_floating_point_underflow(); }
 150             else
 151             { return cl_DF_0; }
 152         }
 153       else
 154       if (exp > (sintL)(DF_exp_high-DF_exp_mid))
 155         { cl_error_floating_point_overflow(); }
 156       else
 157       return allocate_dfloat
 158         (  ((sint64)sign & bit(63))                  /* Vorzeichen */
 159          | ((uint64)(exp+DF_exp_mid) << DF_mant_len) /* Exponent   */
 160          | ((uint64)mant & (bit(DF_mant_len)-1))     /* Mantisse   */
 161         );
 162 }
 163 #else
 164 // encode_DF(sign,exp,manthi,mantlo)
 165 // liefert ein Double-Float.
 166 // > cl_signean sign: Vorzeichen, 0 für +, -1 für negativ.
 167 // > sintL exp: Exponent
 168 // > uintL manthi,mantlo: Mantisse 2^32*manthi+mantlo,
 169 //                        sollte >= 2^DF_mant_len und < 2^(DF_mant_len+1) sein.
 170 // < cl_DF ergebnis: ein Double-Float
 171 // Der Exponent wird auf Überlauf/Unterlauf getestet.
 172 inline const cl_DF encode_DF (cl_signean sign, sintL exp, uintL manthi, uintL mantlo)
 173 {
 174       if (exp < (sintL)(DF_exp_low-DF_exp_mid))
 175         { if (underflow_allowed())
 176             { cl_error_floating_point_underflow(); }
 177             else
 178             { return cl_DF_0; }
 179         }
 180       else
 181       if (exp > (sintL)(DF_exp_high-DF_exp_mid))
 182         { cl_error_floating_point_overflow(); }
 183       else
 184       return allocate_dfloat
 185         (  ((sint32)sign & bit(31))                       /* Vorzeichen */
 186          | ((uint32)(exp+DF_exp_mid) << (DF_mant_len-32)) /* Exponent   */
 187          | ((uint32)manthi & (bit(DF_mant_len-32)-1))     /* Mantisse   */
 188          , mantlo
 189         );
 190 }
 191 #endif
 192
 193 #ifdef FAST_DOUBLE
 194 // Auspacken eines Double:
 195 inline double DF_to_double (const cl_DF& obj)
 196 {
 197         return TheDfloat(obj)->representation.machine_double;
 198 }
 199 // Überprüfen und Einpacken eines von den 'double'-Routinen gelieferten
 200 // IEEE-Floats.
 201 // Klassifikation:
 202 //   1 <= e <= 2046 : normalisierte Zahl
 203 //   e=0, m/=0: subnormale Zahl
 204 //   e=0, m=0: vorzeichenbehaftete 0.0
 205 //   e=2047, m=0: vorzeichenbehaftete Infinity
 206 //   e=2047, m/=0: NaN
 207 // Angabe der möglicherweise auftretenden Sonderfälle:
 208 //   maybe_overflow: Operation läuft über, liefert IEEE-Infinity
 209 //   maybe_subnormal: Ergebnis sehr klein, liefert IEEE-subnormale Zahl
 210 //   maybe_underflow: Ergebnis sehr klein und /=0, liefert IEEE-Null
 211 //   maybe_divide_0: Ergebnis unbestimmt, liefert IEEE-Infinity
 212 //   maybe_nan: Ergebnis unbestimmt, liefert IEEE-NaN
 213   #include "cl_N.h"
 214   #include "cl_F.h"
 215 #if (cl_word_size==64)
 216   #define double_to_DF(expr,ergebnis_zuweisung,maybe_overflow,maybe_subnormal,maybe_underflow,maybe_divide_0,maybe_nan)  \
 217     { var dfloatjanus _erg; _erg.machine_double = (expr);               \
 218       if ((_erg.eksplicit & ((uint64)bit(DF_exp_len+DF_mant_len)-bit(DF_mant_len))) == 0) /* e=0 ? */\
 219         { if ((maybe_underflow                                          \
 220                || (maybe_subnormal && !((_erg.eksplicit << 1) == 0))    \
 221               )                                                         \
 222               && underflow_allowed()                                    \
 223              )                                                          \
 224             { cl_error_floating_point_underflow(); } /* subnormal oder noch kleiner -> Underflow */\
 225             else                                                        \
 226             { ergebnis_zuweisung cl_DF_0; } /* +/- 0.0 -> 0.0 */        \
 227         }                                                               \
 228       elif ((maybe_overflow || maybe_divide_0)                          \
 229             && (((~_erg.eksplicit) & ((uint64)bit(DF_exp_len+DF_mant_len)-bit(DF_mant_len))) == 0) /* e=2047 ? */\
 230            )                                                            \
 231         { if (maybe_nan && !((_erg.eksplicit<<(64-DF_mant_len)) == 0))  \
 232             { cl_error_division_by_0(); } /* NaN, also Singularität -> "Division durch 0" */\
 233           else /* Infinity */                                           \
 234           if (!maybe_overflow || maybe_divide_0)                        \
 235             { cl_error_division_by_0(); } /* Infinity, Division durch 0 */\
 236             else                                                        \
 237             { cl_error_floating_point_overflow(); } /* Infinity, Overflow */\
 238         }                                                               \
 239       else                                                              \
 240         { ergebnis_zuweisung allocate_dfloat(_erg.eksplicit); }         \
 241     }
 242 #else
 243   #define double_to_DF(expr,ergebnis_zuweisung,maybe_overflow,maybe_subnormal,maybe_underflow,maybe_divide_0,maybe_nan)  \
 244     { var dfloatjanus _erg; _erg.machine_double = (expr);                 \
 245       if ((_erg.eksplicit.semhi & ((uint32)bit(DF_exp_len+DF_mant_len-32)-bit(DF_mant_len-32))) == 0) /* e=0 ? */\
 246         { if ((maybe_underflow                                            \
 247                || (maybe_subnormal                                        \
 248                    && !(((_erg.eksplicit.semhi << 1) == 0) && (_erg.eksplicit.mlo == 0)) \
 249               )   )                                                       \
 250               && underflow_allowed()                                      \
 251              )                                                            \
 252             { cl_error_floating_point_underflow(); } /* subnormal oder noch kleiner -> Underflow */\
 253             else                                                          \
 254             { ergebnis_zuweisung cl_DF_0; } /* +/- 0.0 -> 0.0           */\
 255         }                                                                 \
 256       elif ((maybe_overflow || maybe_divide_0)                            \
 257             && (((~_erg.eksplicit.semhi) & ((uint32)bit(DF_exp_len+DF_mant_len-32)-bit(DF_mant_len-32))) == 0) /* e=2047 ?  */\
 258            )                                                              \
 259         { if (maybe_nan && !(((_erg.eksplicit.semhi<<(64-DF_mant_len)) == 0) && (_erg.eksplicit.mlo==0))) \
 260             { cl_error_division_by_0(); } /* NaN, also Singularität -> "Division durch 0"  */\
 261           else /* Infinity                                              */\
 262           if (!maybe_overflow || maybe_divide_0)                          \
 263             { cl_error_division_by_0(); } /* Infinity, Division durch 0 */\
 264             else                                                          \
 265             { cl_error_floating_point_overflow(); } /* Infinity, Overflow */\
 266         }                                                                 \
 267       else                                                                \
 268         { ergebnis_zuweisung allocate_dfloat(_erg.eksplicit.semhi,_erg.eksplicit.mlo); }  \
 269     }
 270 #endif
 271 #endif
 272
 273 // Liefert zu einem Double-Float x : (futruncate x), ein DF.
 274 // x wird von der 0 weg zur nächsten ganzen Zahl gerundet.
 275 extern const cl_DF futruncate (const cl_DF& x);
 276
 277 // DF_to_I(x) wandelt ein Double-Float x, das eine ganze Zahl darstellt,
 278 // in ein Integer um.
 279 extern const cl_I cl_DF_to_I (const cl_DF& x);
 280
 281 // cl_I_to_DF(x) wandelt ein Integer x in ein Double-Float um und rundet dabei.
 282 extern const cl_DF cl_I_to_DF (const cl_I& x);
 283
 284 // cl_RA_to_DF(x) wandelt eine rationale Zahl x in ein Double-Float um
 285 // und rundet dabei.
 286 extern const cl_DF cl_RA_to_DF (const cl_RA& x);
 287
 288
 289 // IEEE-Double-Float:
 290 // Bit 63 = s, Bits 62..52 = e, Bits 51..0 = m.
 291 //   e=0, m=0: vorzeichenbehaftete 0.0
 292 //   e=0, m/=0: subnormale Zahl,
 293 //     Wert = (-1)^s * 2^(1-1022) * [ 0 . 0 m51 ... m0 ]
 294 //   1 <= e <= 2046 : normalisierte Zahl,
 295 //     Wert = (-1)^s * 2^(e-1022) * [ 0 . 1 m51 ... m0 ]
 296 //   e=2047, m=0: vorzeichenbehaftete Infinity
 297 //   e=2047, m/=0: NaN
 298
 299 // cl_double_to_DF(val) wandelt ein IEEE-Double-Float val in ein Double-Float um.
 300 extern cl_heap_dfloat* cl_double_to_DF_pointer (const dfloatjanus& val);
 301 inline const cl_DF cl_double_to_DF (const dfloatjanus& val)
 302         { return cl_double_to_DF_pointer(val); }
 303
 304 // cl_DF_to_double(obj,&val);
 305 // wandelt ein Double-Float obj in ein IEEE-Double-Float val um.
 306 extern void cl_DF_to_double (const cl_DF& obj, dfloatjanus* val_);
 307
 308 #endif /* _CL_DF_H */