src/vector/cl_GV_I.cc

   1 // cl_make_heap_GV_I().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 CL_PROVIDE(cl_GV_I)
   7
   8 // Specification.
   9 #include "cln/GV_integer.h"
  10
  11
  12 // Implementation.
  13
  14 #include "cl_I.h"
  15 #include "cl_DS.h"
  16 #include "cln/abort.h"
  17 #include "cl_offsetof.h"
  18
  19 namespace cln {
  20
  21 // Memory-efficient integer vectors: If all entries are known in advance to
  22 // be >= 0 and < 2^m, we reserve only m bits for each entry. (m=1,2,4,8,16,32).
  23 // Thus we end up with 6 kinds of bit/byte vectors, and the general integer
  24 // vectors.
  25 // For enquiring purposes, we store m in the vectorops table. Because of this,
  26 // treating a cl_GV_RA as cl_GV_I is wrong. In particular, we cannot use the
  27 // cl_null_GV_N to initialize a cl_GV_I; need a special cl_null_GV_I.
  28
  29
  30 static void cl_gvector_integer_destructor (cl_heap* pointer)
  31 {
  32 #if (defined(__mips__) || defined(__mips64__)) && !defined(__GNUC__) // workaround SGI CC bug
  33         (*(cl_heap_GV_I*)pointer).~cl_heap_GV();
  34 #else
  35         (*(cl_heap_GV_I*)pointer).~cl_heap_GV_I();
  36 #endif
  37 }
  38
  39 cl_class cl_class_gvector_integer = {
  40         cl_gvector_integer_destructor,
  41         0
  42 };
  43
  44
  45 static inline cl_heap_GV_I * outcast (cl_GV_inner<cl_I>* vec)
  46 {
  47         return (cl_heap_GV_I *)((char *) vec - offsetof(cl_heap_GV_I,v));
  48 }
  49 static inline const cl_heap_GV_I * outcast (const cl_GV_inner<cl_I>* vec)
  50 {
  51         return (const cl_heap_GV_I *)((const char *) vec - offsetof(cl_heap_GV_I,v));
  52 }
  53
  54
  55 // Add more info to the vectorops tables.
  56
  57 struct cl_GV_I_vectorops {
  58         cl_GV_vectorops<cl_I> ops;
  59         sintL m; // for maxbits
  60 };
  61
  62 static inline cl_GV_I_vectorops* outcast (cl_GV_vectorops<cl_I>* vectorops)
  63 {
  64         return (cl_GV_I_vectorops*)((char *) vectorops - offsetof(cl_GV_I_vectorops,ops));
  65 }
  66
  67
  68 // Vectors of general integers.
  69
  70 struct cl_heap_GV_I_general : public cl_heap_GV_I {
  71         cl_I data[1];
  72         // Standard allocation disabled.
  73         void* operator new (size_t size) { unused size; cl_abort(); return (void*)1; }
  74         // Standard deallocation disabled.
  75         void operator delete (void* ptr) { unused ptr; cl_abort(); }
  76         // No default constructor.
  77         cl_heap_GV_I_general ();
  78 };
  79
  80 static const cl_I general_element (const cl_GV_inner<cl_I>* vec, uintL index)
  81 {
  82         return ((const cl_heap_GV_I_general *) outcast(vec))->data[index];
  83 }
  84
  85 static void general_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
  86 {
  87         ((cl_heap_GV_I_general *) outcast(vec))->data[index] = x;
  88 }
  89
  90 static void general_do_delete (cl_GV_inner<cl_I>* vec)
  91 {
  92         var cl_heap_GV_I_general* hv = (cl_heap_GV_I_general *) outcast(vec);
  93         var uintL len = hv->v.length();
  94         for (var uintL i = 0; i < len; i++)
  95                 hv->data[i].~cl_I();
  96 }
  97
  98 static void general_copy_elements (const cl_GV_inner<cl_I>* srcvec, uintL srcindex, cl_GV_inner<cl_I>* destvec, uintL destindex, uintL count)
  99 {
 100         if (count > 0) {
 101                 var const cl_heap_GV_I_general* srcv =
 102                   (const cl_heap_GV_I_general *) outcast(srcvec);
 103                 var cl_heap_GV_I_general* destv =
 104                   (cl_heap_GV_I_general *) outcast(destvec);
 105                 var uintL srclen = srcv->v.length();
 106                 var uintL destlen = destv->v.length();
 107                 if (!(srcindex <= srcindex+count && srcindex+count <= srclen))
 108                         cl_abort();
 109                 if (!(destindex <= destindex+count && destindex+count <= destlen))
 110                         cl_abort();
 111                 do {
 112                         destv->data[destindex++] = srcv->data[srcindex++];
 113                 } while (--count > 0);
 114         }
 115 }
 116
 117 static cl_GV_I_vectorops general_vectorops = {{
 118         general_element,
 119         general_set_element,
 120         general_do_delete,
 121         general_copy_elements },
 122         -1
 123 };
 124
 125 cl_heap_GV_I* cl_make_heap_GV_I (uintL len)
 126 {
 127         var cl_heap_GV_I_general* hv = (cl_heap_GV_I_general*) malloc_hook(offsetofa(cl_heap_GV_I_general,data)+sizeof(cl_I)*len);
 128         hv->refcount = 1;
 129         hv->type = &cl_class_gvector_integer;
 130         new (&hv->v) cl_GV_inner<cl_I> (len,&general_vectorops.ops);
 131         for (var uintL i = 0; i < len; i++)
 132                 init1(cl_I, hv->data[i]) ();
 133         return hv;
 134 }
 135
 136
 137 // Vectors of integers requiring only few bits.
 138
 139 #define DEFINE_cl_heap_GV_I_bits(m,uint_t)  \
 140 struct cl_heap_GV_I_bits##m : public cl_heap_GV_I {                     \
 141         uint_t data[1];                                                 \
 142         /* Standard allocation disabled. */                             \
 143         void* operator new (size_t size) { unused size; cl_abort(); return (void*)1; } \
 144         /* Standard deallocation disabled. */                           \
 145         void operator delete (void* ptr) { unused ptr; cl_abort(); }    \
 146         /* No default constructor. */                                   \
 147         cl_heap_GV_I_bits##m ();                                        \
 148 };                                                                      \
 149 static const cl_I bits##m##_element (const cl_GV_inner<cl_I>* vec, uintL index); \
 150 static void bits##m##_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x); \
 151 static void bits##m##_copy_elements (const cl_GV_inner<cl_I>* srcvec, uintL srcindex, cl_GV_inner<cl_I>* destvec, uintL destindex, uintL count) \
 152 {                                                                               \
 153         if (count > 0) {                                                        \
 154                 var const cl_heap_GV_I_bits##m * srcv =                         \
 155                   (const cl_heap_GV_I_bits##m *) outcast(srcvec);               \
 156                 var cl_heap_GV_I_bits##m * destv =                              \
 157                   (cl_heap_GV_I_bits##m *) outcast(destvec);                    \
 158                 var uintL srclen = srcv->v.length();                            \
 159                 var uintL destlen = destv->v.length();                          \
 160                 if (!(srcindex <= srcindex+count && srcindex+count <= srclen))  \
 161                         cl_abort();                                             \
 162                 if (!(destindex <= destindex+count && destindex+count <= destlen)) \
 163                         cl_abort();                                             \
 164                 if (m == intDsize) {                                            \
 165                         var const uintD* srcptr = &srcv->data[srcindex];        \
 166                         var uintD* destptr = &destv->data[destindex];           \
 167                         do {                                                    \
 168                                 *destptr++ = *srcptr++;                         \
 169                         } while (--count > 0);                                  \
 170                 } else                                                          \
 171                         bits_copy(srcv->data,m*srcindex,destv->data,m*destindex,m*count); \
 172         }                                                                       \
 173 }                                                                       \
 174 static cl_GV_I_vectorops bits##m##_vectorops = {{                       \
 175         bits##m##_element,                                              \
 176         bits##m##_set_element,                                          \
 177         bits_do_delete,                                                 \
 178         bits##m##_copy_elements },                                      \
 179         m                                                               \
 180 };
 181
 182 static void bits_do_delete (cl_GV_inner<cl_I>* vec)
 183 {
 184         unused vec;
 185 }
 186
 187 // Copy bits srcptr.bits[srcindex..srcindex+count-1] into destptr.bits[destindex..destindex+count-1].
 188 // Assumes that all range checks have already been performed.
 189 static void bits_copy (const uintD* srcptr, uintL srcindex, uintD* destptr, uintL destindex, uintL count)
 190 {
 191         srcptr += floor(srcindex,intDsize);
 192         destptr += floor(destindex,intDsize);
 193         srcindex = srcindex%intDsize;
 194         destindex = destindex%intDsize;
 195         // Now 0 <= srcindex < intDsize and 0 <= destindex < intDsize.
 196         if (srcindex == destindex) {
 197                 // src and dest are aligned with respect to each other.
 198                 if (srcindex > 0) {
 199                         if (count <= intDsize-srcindex) {
 200                                 *destptr ^= (*destptr ^ *srcptr) & ((uintD)(bit(count)-1) << srcindex);
 201                                 return;
 202                         }
 203                         *destptr ^= (*destptr ^ *srcptr) & (uintD)minus_bit(srcindex);
 204                         srcptr++;
 205                         destptr++;
 206                         count -= intDsize-srcindex;
 207                 }
 208                 // Now srcindex and destindex can be assumed to be 0.
 209                 var uintL count1 = count%intDsize;
 210                 count = floor(count,intDsize);
 211                 if (count > 0) {
 212                         do {
 213                                 *destptr++ = *srcptr++;
 214                         } while (--count > 0);
 215                 }
 216                 if (count1 > 0) {
 217                         *destptr ^= (*destptr ^ *srcptr) & (uintD)(bit(count1)-1);
 218                 }
 219         } else {
 220                 var uintL i = destindex - srcindex;
 221                 var uintD tmp;
 222                 if (destindex >= srcindex) { // i > 0
 223                         if (count <= intDsize-destindex) {
 224                                 *destptr ^= (*destptr ^ (*srcptr << i)) & ((uintD)(bit(count)-1) << destindex);
 225                                 return;
 226                         }
 227                         *destptr ^= (*destptr ^ (*srcptr << i)) & (uintD)minus_bit(destindex);
 228                         destptr++;
 229                         tmp = *srcptr >> (intDsize-i);
 230                         count -= intDsize-destindex;
 231                 } else { // i < 0
 232                         if (count <= intDsize-srcindex) {
 233                                 *destptr ^= (*destptr ^ (*srcptr >> -i)) & ((uintD)(bit(count)-1) << destindex);
 234                                 return;
 235                         }
 236                         tmp = (*destptr & (uintD)(bit(destindex)-1)) | ((*srcptr >> srcindex) << destindex);
 237                         count += destindex;
 238                         i += intDsize;
 239                 }
 240                 srcptr++;
 241                 // tmp now contains the low i bits to be put into *destptr.
 242                 var uintL count1 = count%intDsize;
 243                 count = floor(count,intDsize);
 244                 var uintD lastdest;
 245                 if (count == 0)
 246                         lastdest = tmp;
 247                 else {
 248                         lastdest = shiftleftcopy_loop_up(srcptr,destptr,count,i);
 249                         *destptr |= tmp;
 250                 }
 251                 // lastdest now contains the i bits shifted out of the top of the source.
 252                 if (count1 > 0) {
 253                         destptr += count;
 254                         if (count1 > i)
 255                                 lastdest |= *(srcptr += count) << i;
 256                         *destptr ^= (*destptr ^ lastdest) & (uintD)(bit(count1)-1);
 257                 }
 258         }
 259 }
 260
 261
 262 // It would be most natural to use the following type for uint_t:
 263 // m = 1: uint_t = uint8
 264 // m = 2: uint_t = uint8
 265 // m = 4: uint_t = uint8
 266 // m = 8: uint_t = uint8
 267 // m = 16: uint_t = uint16
 268 // m = 32: uint_t = uint32
 269 // But we want to have a fast copy_elements routine. And for m=1,
 270 // we also want to use the fast shiftxor_loop_up() function for addition.
 271 // Hence we use the uint_t = uintD in all cases. (NB: intDsize>=32.)
 272
 273 // The last ceiling(len*m/intDsize)*intDsize-len*m unused bits in the last word
 274 // are always 0. This provides some simplification for routines which work on
 275 // entire words: They don't need to special-case the last word.
 276
 277
 278 DEFINE_cl_heap_GV_I_bits(1,uintD)
 279
 280 static const cl_I bits1_element (const cl_GV_inner<cl_I>* vec, uintL index)
 281 {
 282         return (unsigned int)((((const cl_heap_GV_I_bits1 *) outcast(vec))->data[index/intDsize] >> (index%intDsize)) & 0x1);
 283 }
 284 static void bits1_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 285 {
 286         var uint32 xval;
 287         if (fixnump(x)) {
 288                 xval = FN_to_UL(x);
 289                 if (xval <= 0x1) {
 290                         var uintD* ptr = &((cl_heap_GV_I_bits1 *) outcast(vec))->data[index/intDsize];
 291                         index = index%intDsize;
 292                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0x1 << index));
 293                         return;
 294                 }
 295         }
 296         cl_abort();
 297 }
 298
 299
 300 DEFINE_cl_heap_GV_I_bits(2,uintD)
 301
 302 static const cl_I bits2_element (const cl_GV_inner<cl_I>* vec, uintL index)
 303 {
 304         return (unsigned int)((((const cl_heap_GV_I_bits2 *) outcast(vec))->data[index/(intDsize/2)] >> (2*(index%(intDsize/2)))) & 0x3);
 305 }
 306 static void bits2_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 307 {
 308         var uint32 xval;
 309         if (fixnump(x)) {
 310                 xval = FN_to_UL(x);
 311                 if (xval <= 0x3) {
 312                         var uintD* ptr = &((cl_heap_GV_I_bits2 *) outcast(vec))->data[index/(intDsize/2)];
 313                         index = 2*(index%(intDsize/2));
 314                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0x3 << index));
 315                         return;
 316                 }
 317         }
 318         cl_abort();
 319 }
 320
 321
 322 DEFINE_cl_heap_GV_I_bits(4,uintD)
 323
 324 static const cl_I bits4_element (const cl_GV_inner<cl_I>* vec, uintL index)
 325 {
 326         return (unsigned int)((((const cl_heap_GV_I_bits4 *) outcast(vec))->data[index/(intDsize/4)] >> (4*(index%(intDsize/4)))) & 0xF);
 327 }
 328 static void bits4_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 329 {
 330         var uint32 xval;
 331         if (fixnump(x)) {
 332                 xval = FN_to_UL(x);
 333                 if (xval <= 0xF) {
 334                         var uintD* ptr = &((cl_heap_GV_I_bits4 *) outcast(vec))->data[index/(intDsize/4)];
 335                         index = 4*(index%(intDsize/4));
 336                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xF << index));
 337                         return;
 338                 }
 339         }
 340         cl_abort();
 341 }
 342
 343
 344 DEFINE_cl_heap_GV_I_bits(8,uintD)
 345
 346 static const cl_I bits8_element (const cl_GV_inner<cl_I>* vec, uintL index)
 347 {
 348         #if CL_CPU_BIG_ENDIAN_P
 349         return (unsigned int)((((const cl_heap_GV_I_bits8 *) outcast(vec))->data[index/(intDsize/8)] >> (8*(index%(intDsize/8)))) & 0xFF);
 350         #else
 351         // Optimization which assumes little-endian storage of uint8 in an uintD
 352         return (unsigned int)(((uint8*)(((const cl_heap_GV_I_bits8 *) outcast(vec))->data))[index]);
 353         #endif
 354 }
 355 static void bits8_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 356 {
 357         var uint32 xval;
 358         if (fixnump(x)) {
 359                 xval = FN_to_UL(x);
 360                 if (xval <= 0xFF) {
 361                         #if CL_CPU_BIG_ENDIAN_P
 362                         var uintD* ptr = &((cl_heap_GV_I_bits8 *) outcast(vec))->data[index/(intDsize/8)];
 363                         index = 8*(index%(intDsize/8));
 364                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFF << index));
 365                         #else
 366                         // Optimization which assumes little-endian storage of uint8 in an uintD
 367                         ((uint8*)(((cl_heap_GV_I_bits8 *) outcast(vec))->data))[index] = xval;
 368                         #endif
 369                         return;
 370                 }
 371         }
 372         cl_abort();
 373 }
 374
 375
 376 DEFINE_cl_heap_GV_I_bits(16,uintD)
 377
 378 static const cl_I bits16_element (const cl_GV_inner<cl_I>* vec, uintL index)
 379 {
 380         #if CL_CPU_BIG_ENDIAN_P
 381         return (unsigned int)((((const cl_heap_GV_I_bits16 *) outcast(vec))->data[index/(intDsize/16)] >> (16*(index%(intDsize/16)))) & 0xFFFF);
 382         #else
 383         // Optimization which assumes little-endian storage of uint16 in an uintD
 384         return (unsigned int)(((uint16*)(((const cl_heap_GV_I_bits16 *) outcast(vec))->data))[index]);
 385         #endif
 386 }
 387 static void bits16_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 388 {
 389         var uint32 xval;
 390         if (fixnump(x)) {
 391                 xval = FN_to_UL(x);
 392                 if (xval <= 0xFFFF) {
 393                         #if CL_CPU_BIG_ENDIAN_P
 394                         var uintD* ptr = &((cl_heap_GV_I_bits16 *) outcast(vec))->data[index/(intDsize/16)];
 395                         index = 16*(index%(intDsize/16));
 396                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFFFF << index));
 397                         #else
 398                         // Optimization which assumes little-endian storage of uint16 in an uintD
 399                         ((uint16*)(((cl_heap_GV_I_bits16 *) outcast(vec))->data))[index] = xval;
 400                         #endif
 401                         return;
 402                 }
 403         }
 404         cl_abort();
 405 }
 406
 407
 408 DEFINE_cl_heap_GV_I_bits(32,uintD)
 409
 410 static const cl_I bits32_element (const cl_GV_inner<cl_I>* vec, uintL index)
 411 {
 412         #if (intDsize==32)
 413         return (unsigned long)(((const cl_heap_GV_I_bits32 *) outcast(vec))->data[index]);
 414         #elif CL_CPU_BIG_ENDIAN_P
 415         return (unsigned long)((((const cl_heap_GV_I_bits32 *) outcast(vec))->data[index/(intDsize/32)] >> (32*(index%(intDsize/32)))) & 0xFFFFFFFF);
 416         #else
 417         // Optimization which assumes little-endian storage of uint32 in an uintD
 418         return (unsigned long)(((uint32*)(((const cl_heap_GV_I_bits32 *) outcast(vec))->data))[index]);
 419         #endif
 420 }
 421 static void bits32_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 422 {
 423         var uint32 xval = cl_I_to_UL(x);
 424         #if (intDsize==32)
 425         ((cl_heap_GV_I_bits32 *) outcast(vec))->data[index] = xval;
 426         #elif CL_CPU_BIG_ENDIAN_P
 427         var uintD* ptr = &((cl_heap_GV_I_bits32 *) outcast(vec))->data[index/(intDsize/32)];
 428         index = 32*(index%(intDsize/32));
 429         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFFFFFFFF << index));
 430         #else
 431         // Optimization which assumes little-endian storage of uint32 in an uintD
 432         ((uint32*)(((cl_heap_GV_I_bits32 *) outcast(vec))->data))[index] = xval;
 433         #endif
 434 }
 435
 436
 437 static cl_GV_I_vectorops* bits_vectorops[6] = {
 438         &bits1_vectorops,
 439         &bits2_vectorops,
 440         &bits4_vectorops,
 441         &bits8_vectorops,
 442         &bits16_vectorops,
 443         &bits32_vectorops
 444 };
 445
 446 cl_heap_GV_I* cl_make_heap_GV_I (uintL len, sintL m)
 447 {
 448         // Determine log2(bits).
 449         var uintL log2_bits;
 450         switch (m) {
 451                 case 0: case 1:
 452                         log2_bits = 0; break;
 453                 case 2:
 454                         log2_bits = 1; break;
 455                 case 3: case 4:
 456                         log2_bits = 2; break;
 457                 case 5: case 6: case 7: case 8:
 458                         log2_bits = 3; break;
 459                 case 9: case 10: case 11: case 12:
 460                 case 13: case 14: case 15: case 16:
 461                         log2_bits = 4; break;
 462                 case 17: case 18: case 19: case 20:
 463                 case 21: case 22: case 23: case 24:
 464                 case 25: case 26: case 27: case 28:
 465                 case 29: case 30: case 31: case 32:
 466                         log2_bits = 5; break;
 467                 default:
 468                         return cl_make_heap_GV_I(len);
 469         }
 470         // For room allocation purposes, be pessimistic: assume the uintD case (since intDsize>=32).
 471         var uintL words = // ceiling(len*2^log2_bits,intDsize)
 472           (((sintL)len-1)>>(log2_intDsize-log2_bits))+1;
 473         var cl_heap_GV_I_bits32* hv = (cl_heap_GV_I_bits32*) malloc_hook(offsetofa(cl_heap_GV_I_bits32,data)+sizeof(uintD)*words);
 474         hv->refcount = 1;
 475         hv->type = &cl_class_gvector_integer;
 476         new (&hv->v) cl_GV_inner<cl_I> (len,&bits_vectorops[log2_bits]->ops);
 477         var uintD* ptr = (uintD*)(hv->data);
 478         for (var uintL i = 0; i < words; i++)
 479                 ptr[i] = 0;
 480         return (cl_heap_GV_I*) hv;
 481 }
 482
 483
 484 sintL cl_heap_GV_I::maxbits () const
 485 {
 486         return outcast(v.vectorops)->m;
 487 }
 488
 489
 490 // An empty vector.
 491 const cl_GV_I cl_null_GV_I = cl_GV_I((uintL)0);
 492
 493 }  // namespace cln
 494
 495 CL_PROVIDE_END(cl_GV_I)