ewah / ewah_bitmap.con commit ewah_bitmap: delete unused 'ewah_and()' (01b4a63)
   1/**
   2 * Copyright 2013, GitHub, Inc
   3 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
   4 *      David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "git-compat-util.h"
  20#include "ewok.h"
  21#include "ewok_rlw.h"
  22
  23static inline size_t min_size(size_t a, size_t b)
  24{
  25        return a < b ? a : b;
  26}
  27
  28static inline size_t max_size(size_t a, size_t b)
  29{
  30        return a > b ? a : b;
  31}
  32
  33static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size)
  34{
  35        size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer;
  36
  37        if (self->alloc_size >= new_size)
  38                return;
  39
  40        self->alloc_size = new_size;
  41        REALLOC_ARRAY(self->buffer, self->alloc_size);
  42        self->rlw = self->buffer + (rlw_offset / sizeof(eword_t));
  43}
  44
  45static inline void buffer_push(struct ewah_bitmap *self, eword_t value)
  46{
  47        if (self->buffer_size + 1 >= self->alloc_size)
  48                buffer_grow(self, self->buffer_size * 3 / 2);
  49
  50        self->buffer[self->buffer_size++] = value;
  51}
  52
  53static void buffer_push_rlw(struct ewah_bitmap *self, eword_t value)
  54{
  55        buffer_push(self, value);
  56        self->rlw = self->buffer + self->buffer_size - 1;
  57}
  58
  59static size_t add_empty_words(struct ewah_bitmap *self, int v, size_t number)
  60{
  61        size_t added = 0;
  62        eword_t runlen, can_add;
  63
  64        if (rlw_get_run_bit(self->rlw) != v && rlw_size(self->rlw) == 0) {
  65                rlw_set_run_bit(self->rlw, v);
  66        } else if (rlw_get_literal_words(self->rlw) != 0 ||
  67                        rlw_get_run_bit(self->rlw) != v) {
  68                buffer_push_rlw(self, 0);
  69                if (v) rlw_set_run_bit(self->rlw, v);
  70                added++;
  71        }
  72
  73        runlen = rlw_get_running_len(self->rlw);
  74        can_add = min_size(number, RLW_LARGEST_RUNNING_COUNT - runlen);
  75
  76        rlw_set_running_len(self->rlw, runlen + can_add);
  77        number -= can_add;
  78
  79        while (number >= RLW_LARGEST_RUNNING_COUNT) {
  80                buffer_push_rlw(self, 0);
  81                added++;
  82                if (v) rlw_set_run_bit(self->rlw, v);
  83                rlw_set_running_len(self->rlw, RLW_LARGEST_RUNNING_COUNT);
  84                number -= RLW_LARGEST_RUNNING_COUNT;
  85        }
  86
  87        if (number > 0) {
  88                buffer_push_rlw(self, 0);
  89                added++;
  90
  91                if (v) rlw_set_run_bit(self->rlw, v);
  92                rlw_set_running_len(self->rlw, number);
  93        }
  94
  95        return added;
  96}
  97
  98size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number)
  99{
 100        if (number == 0)
 101                return 0;
 102
 103        self->bit_size += number * BITS_IN_EWORD;
 104        return add_empty_words(self, v, number);
 105}
 106
 107static size_t add_literal(struct ewah_bitmap *self, eword_t new_data)
 108{
 109        eword_t current_num = rlw_get_literal_words(self->rlw);
 110
 111        if (current_num >= RLW_LARGEST_LITERAL_COUNT) {
 112                buffer_push_rlw(self, 0);
 113
 114                rlw_set_literal_words(self->rlw, 1);
 115                buffer_push(self, new_data);
 116                return 2;
 117        }
 118
 119        rlw_set_literal_words(self->rlw, current_num + 1);
 120
 121        /* sanity check */
 122        assert(rlw_get_literal_words(self->rlw) == current_num + 1);
 123
 124        buffer_push(self, new_data);
 125        return 1;
 126}
 127
 128void ewah_add_dirty_words(
 129        struct ewah_bitmap *self, const eword_t *buffer,
 130        size_t number, int negate)
 131{
 132        size_t literals, can_add;
 133
 134        while (1) {
 135                literals = rlw_get_literal_words(self->rlw);
 136                can_add = min_size(number, RLW_LARGEST_LITERAL_COUNT - literals);
 137
 138                rlw_set_literal_words(self->rlw, literals + can_add);
 139
 140                if (self->buffer_size + can_add >= self->alloc_size)
 141                        buffer_grow(self, (self->buffer_size + can_add) * 3 / 2);
 142
 143                if (negate) {
 144                        size_t i;
 145                        for (i = 0; i < can_add; ++i)
 146                                self->buffer[self->buffer_size++] = ~buffer[i];
 147                } else {
 148                        memcpy(self->buffer + self->buffer_size,
 149                                buffer, can_add * sizeof(eword_t));
 150                        self->buffer_size += can_add;
 151                }
 152
 153                self->bit_size += can_add * BITS_IN_EWORD;
 154
 155                if (number - can_add == 0)
 156                        break;
 157
 158                buffer_push_rlw(self, 0);
 159                buffer += can_add;
 160                number -= can_add;
 161        }
 162}
 163
 164static size_t add_empty_word(struct ewah_bitmap *self, int v)
 165{
 166        int no_literal = (rlw_get_literal_words(self->rlw) == 0);
 167        eword_t run_len = rlw_get_running_len(self->rlw);
 168
 169        if (no_literal && run_len == 0) {
 170                rlw_set_run_bit(self->rlw, v);
 171                assert(rlw_get_run_bit(self->rlw) == v);
 172        }
 173
 174        if (no_literal && rlw_get_run_bit(self->rlw) == v &&
 175                run_len < RLW_LARGEST_RUNNING_COUNT) {
 176                rlw_set_running_len(self->rlw, run_len + 1);
 177                assert(rlw_get_running_len(self->rlw) == run_len + 1);
 178                return 0;
 179        } else {
 180                buffer_push_rlw(self, 0);
 181
 182                assert(rlw_get_running_len(self->rlw) == 0);
 183                assert(rlw_get_run_bit(self->rlw) == 0);
 184                assert(rlw_get_literal_words(self->rlw) == 0);
 185
 186                rlw_set_run_bit(self->rlw, v);
 187                assert(rlw_get_run_bit(self->rlw) == v);
 188
 189                rlw_set_running_len(self->rlw, 1);
 190                assert(rlw_get_running_len(self->rlw) == 1);
 191                assert(rlw_get_literal_words(self->rlw) == 0);
 192                return 1;
 193        }
 194}
 195
 196size_t ewah_add(struct ewah_bitmap *self, eword_t word)
 197{
 198        self->bit_size += BITS_IN_EWORD;
 199
 200        if (word == 0)
 201                return add_empty_word(self, 0);
 202
 203        if (word == (eword_t)(~0))
 204                return add_empty_word(self, 1);
 205
 206        return add_literal(self, word);
 207}
 208
 209void ewah_set(struct ewah_bitmap *self, size_t i)
 210{
 211        const size_t dist =
 212                DIV_ROUND_UP(i + 1, BITS_IN_EWORD) -
 213                DIV_ROUND_UP(self->bit_size, BITS_IN_EWORD);
 214
 215        assert(i >= self->bit_size);
 216
 217        self->bit_size = i + 1;
 218
 219        if (dist > 0) {
 220                if (dist > 1)
 221                        add_empty_words(self, 0, dist - 1);
 222
 223                add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
 224                return;
 225        }
 226
 227        if (rlw_get_literal_words(self->rlw) == 0) {
 228                rlw_set_running_len(self->rlw,
 229                        rlw_get_running_len(self->rlw) - 1);
 230                add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
 231                return;
 232        }
 233
 234        self->buffer[self->buffer_size - 1] |=
 235                ((eword_t)1 << (i % BITS_IN_EWORD));
 236
 237        /* check if we just completed a stream of 1s */
 238        if (self->buffer[self->buffer_size - 1] == (eword_t)(~0)) {
 239                self->buffer[--self->buffer_size] = 0;
 240                rlw_set_literal_words(self->rlw,
 241                        rlw_get_literal_words(self->rlw) - 1);
 242                add_empty_word(self, 1);
 243        }
 244}
 245
 246void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload)
 247{
 248        size_t pos = 0;
 249        size_t pointer = 0;
 250        size_t k;
 251
 252        while (pointer < self->buffer_size) {
 253                eword_t *word = &self->buffer[pointer];
 254
 255                if (rlw_get_run_bit(word)) {
 256                        size_t len = rlw_get_running_len(word) * BITS_IN_EWORD;
 257                        for (k = 0; k < len; ++k, ++pos)
 258                                callback(pos, payload);
 259                } else {
 260                        pos += rlw_get_running_len(word) * BITS_IN_EWORD;
 261                }
 262
 263                ++pointer;
 264
 265                for (k = 0; k < rlw_get_literal_words(word); ++k) {
 266                        int c;
 267
 268                        /* todo: zero count optimization */
 269                        for (c = 0; c < BITS_IN_EWORD; ++c, ++pos) {
 270                                if ((self->buffer[pointer] & ((eword_t)1 << c)) != 0)
 271                                        callback(pos, payload);
 272                        }
 273
 274                        ++pointer;
 275                }
 276        }
 277}
 278
 279struct ewah_bitmap *ewah_new(void)
 280{
 281        struct ewah_bitmap *self;
 282
 283        self = xmalloc(sizeof(struct ewah_bitmap));
 284        self->alloc_size = 32;
 285        ALLOC_ARRAY(self->buffer, self->alloc_size);
 286
 287        ewah_clear(self);
 288        return self;
 289}
 290
 291void ewah_clear(struct ewah_bitmap *self)
 292{
 293        self->buffer_size = 1;
 294        self->buffer[0] = 0;
 295        self->bit_size = 0;
 296        self->rlw = self->buffer;
 297}
 298
 299void ewah_free(struct ewah_bitmap *self)
 300{
 301        if (!self)
 302                return;
 303
 304        if (self->alloc_size)
 305                free(self->buffer);
 306
 307        free(self);
 308}
 309
 310static void read_new_rlw(struct ewah_iterator *it)
 311{
 312        const eword_t *word = NULL;
 313
 314        it->literals = 0;
 315        it->compressed = 0;
 316
 317        while (1) {
 318                word = &it->buffer[it->pointer];
 319
 320                it->rl = rlw_get_running_len(word);
 321                it->lw = rlw_get_literal_words(word);
 322                it->b = rlw_get_run_bit(word);
 323
 324                if (it->rl || it->lw)
 325                        return;
 326
 327                if (it->pointer < it->buffer_size - 1) {
 328                        it->pointer++;
 329                } else {
 330                        it->pointer = it->buffer_size;
 331                        return;
 332                }
 333        }
 334}
 335
 336int ewah_iterator_next(eword_t *next, struct ewah_iterator *it)
 337{
 338        if (it->pointer >= it->buffer_size)
 339                return 0;
 340
 341        if (it->compressed < it->rl) {
 342                it->compressed++;
 343                *next = it->b ? (eword_t)(~0) : 0;
 344        } else {
 345                assert(it->literals < it->lw);
 346
 347                it->literals++;
 348                it->pointer++;
 349
 350                assert(it->pointer < it->buffer_size);
 351
 352                *next = it->buffer[it->pointer];
 353        }
 354
 355        if (it->compressed == it->rl && it->literals == it->lw) {
 356                if (++it->pointer < it->buffer_size)
 357                        read_new_rlw(it);
 358        }
 359
 360        return 1;
 361}
 362
 363void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent)
 364{
 365        it->buffer = parent->buffer;
 366        it->buffer_size = parent->buffer_size;
 367        it->pointer = 0;
 368
 369        it->lw = 0;
 370        it->rl = 0;
 371        it->compressed = 0;
 372        it->literals = 0;
 373        it->b = 0;
 374
 375        if (it->pointer < it->buffer_size)
 376                read_new_rlw(it);
 377}
 378
 379void ewah_not(struct ewah_bitmap *self)
 380{
 381        size_t pointer = 0;
 382
 383        while (pointer < self->buffer_size) {
 384                eword_t *word = &self->buffer[pointer];
 385                size_t literals, k;
 386
 387                rlw_xor_run_bit(word);
 388                ++pointer;
 389
 390                literals = rlw_get_literal_words(word);
 391                for (k = 0; k < literals; ++k) {
 392                        self->buffer[pointer] = ~self->buffer[pointer];
 393                        ++pointer;
 394                }
 395        }
 396}
 397
 398void ewah_xor(
 399        struct ewah_bitmap *ewah_i,
 400        struct ewah_bitmap *ewah_j,
 401        struct ewah_bitmap *out)
 402{
 403        struct rlw_iterator rlw_i;
 404        struct rlw_iterator rlw_j;
 405        size_t literals;
 406
 407        rlwit_init(&rlw_i, ewah_i);
 408        rlwit_init(&rlw_j, ewah_j);
 409
 410        while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) {
 411                while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) {
 412                        struct rlw_iterator *prey, *predator;
 413                        size_t index;
 414                        int negate_words;
 415
 416                        if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) {
 417                                prey = &rlw_i;
 418                                predator = &rlw_j;
 419                        } else {
 420                                prey = &rlw_j;
 421                                predator = &rlw_i;
 422                        }
 423
 424                        negate_words = !!predator->rlw.running_bit;
 425                        index = rlwit_discharge(prey, out,
 426                                predator->rlw.running_len, negate_words);
 427
 428                        ewah_add_empty_words(out, negate_words,
 429                                predator->rlw.running_len - index);
 430
 431                        rlwit_discard_first_words(predator,
 432                                predator->rlw.running_len);
 433                }
 434
 435                literals = min_size(
 436                        rlw_i.rlw.literal_words,
 437                        rlw_j.rlw.literal_words);
 438
 439                if (literals) {
 440                        size_t k;
 441
 442                        for (k = 0; k < literals; ++k) {
 443                                ewah_add(out,
 444                                        rlw_i.buffer[rlw_i.literal_word_start + k] ^
 445                                        rlw_j.buffer[rlw_j.literal_word_start + k]
 446                                );
 447                        }
 448
 449                        rlwit_discard_first_words(&rlw_i, literals);
 450                        rlwit_discard_first_words(&rlw_j, literals);
 451                }
 452        }
 453
 454        if (rlwit_word_size(&rlw_i) > 0)
 455                rlwit_discharge(&rlw_i, out, ~0, 0);
 456        else
 457                rlwit_discharge(&rlw_j, out, ~0, 0);
 458
 459        out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size);
 460}
 461
 462void ewah_and_not(
 463        struct ewah_bitmap *ewah_i,
 464        struct ewah_bitmap *ewah_j,
 465        struct ewah_bitmap *out)
 466{
 467        struct rlw_iterator rlw_i;
 468        struct rlw_iterator rlw_j;
 469        size_t literals;
 470
 471        rlwit_init(&rlw_i, ewah_i);
 472        rlwit_init(&rlw_j, ewah_j);
 473
 474        while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) {
 475                while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) {
 476                        struct rlw_iterator *prey, *predator;
 477
 478                        if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) {
 479                                prey = &rlw_i;
 480                                predator = &rlw_j;
 481                        } else {
 482                                prey = &rlw_j;
 483                                predator = &rlw_i;
 484                        }
 485
 486                        if ((predator->rlw.running_bit && prey == &rlw_i) ||
 487                                (!predator->rlw.running_bit && prey != &rlw_i)) {
 488                                ewah_add_empty_words(out, 0,
 489                                        predator->rlw.running_len);
 490                                rlwit_discard_first_words(prey,
 491                                        predator->rlw.running_len);
 492                                rlwit_discard_first_words(predator,
 493                                        predator->rlw.running_len);
 494                        } else {
 495                                size_t index;
 496                                int negate_words;
 497
 498                                negate_words = (&rlw_i != prey);
 499                                index = rlwit_discharge(prey, out,
 500                                        predator->rlw.running_len, negate_words);
 501                                ewah_add_empty_words(out, negate_words,
 502                                        predator->rlw.running_len - index);
 503                                rlwit_discard_first_words(predator,
 504                                        predator->rlw.running_len);
 505                        }
 506                }
 507
 508                literals = min_size(
 509                        rlw_i.rlw.literal_words,
 510                        rlw_j.rlw.literal_words);
 511
 512                if (literals) {
 513                        size_t k;
 514
 515                        for (k = 0; k < literals; ++k) {
 516                                ewah_add(out,
 517                                        rlw_i.buffer[rlw_i.literal_word_start + k] &
 518                                        ~(rlw_j.buffer[rlw_j.literal_word_start + k])
 519                                );
 520                        }
 521
 522                        rlwit_discard_first_words(&rlw_i, literals);
 523                        rlwit_discard_first_words(&rlw_j, literals);
 524                }
 525        }
 526
 527        if (rlwit_word_size(&rlw_i) > 0)
 528                rlwit_discharge(&rlw_i, out, ~0, 0);
 529        else
 530                rlwit_discharge_empty(&rlw_j, out);
 531
 532        out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size);
 533}
 534
 535void ewah_or(
 536        struct ewah_bitmap *ewah_i,
 537        struct ewah_bitmap *ewah_j,
 538        struct ewah_bitmap *out)
 539{
 540        struct rlw_iterator rlw_i;
 541        struct rlw_iterator rlw_j;
 542        size_t literals;
 543
 544        rlwit_init(&rlw_i, ewah_i);
 545        rlwit_init(&rlw_j, ewah_j);
 546
 547        while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) {
 548                while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) {
 549                        struct rlw_iterator *prey, *predator;
 550
 551                        if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) {
 552                                prey = &rlw_i;
 553                                predator = &rlw_j;
 554                        } else {
 555                                prey = &rlw_j;
 556                                predator = &rlw_i;
 557                        }
 558
 559                        if (predator->rlw.running_bit) {
 560                                ewah_add_empty_words(out, 0,
 561                                        predator->rlw.running_len);
 562                                rlwit_discard_first_words(prey,
 563                                        predator->rlw.running_len);
 564                                rlwit_discard_first_words(predator,
 565                                        predator->rlw.running_len);
 566                        } else {
 567                                size_t index = rlwit_discharge(prey, out,
 568                                        predator->rlw.running_len, 0);
 569                                ewah_add_empty_words(out, 0,
 570                                        predator->rlw.running_len - index);
 571                                rlwit_discard_first_words(predator,
 572                                        predator->rlw.running_len);
 573                        }
 574                }
 575
 576                literals = min_size(
 577                        rlw_i.rlw.literal_words,
 578                        rlw_j.rlw.literal_words);
 579
 580                if (literals) {
 581                        size_t k;
 582
 583                        for (k = 0; k < literals; ++k) {
 584                                ewah_add(out,
 585                                        rlw_i.buffer[rlw_i.literal_word_start + k] |
 586                                        rlw_j.buffer[rlw_j.literal_word_start + k]
 587                                );
 588                        }
 589
 590                        rlwit_discard_first_words(&rlw_i, literals);
 591                        rlwit_discard_first_words(&rlw_j, literals);
 592                }
 593        }
 594
 595        if (rlwit_word_size(&rlw_i) > 0)
 596                rlwit_discharge(&rlw_i, out, ~0, 0);
 597        else
 598                rlwit_discharge(&rlw_j, out, ~0, 0);
 599
 600        out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size);
 601}
 602
 603
 604#define BITMAP_POOL_MAX 16
 605static struct ewah_bitmap *bitmap_pool[BITMAP_POOL_MAX];
 606static size_t bitmap_pool_size;
 607
 608struct ewah_bitmap *ewah_pool_new(void)
 609{
 610        if (bitmap_pool_size)
 611                return bitmap_pool[--bitmap_pool_size];
 612
 613        return ewah_new();
 614}
 615
 616void ewah_pool_free(struct ewah_bitmap *self)
 617{
 618        if (self == NULL)
 619                return;
 620
 621        if (bitmap_pool_size == BITMAP_POOL_MAX ||
 622                self->alloc_size == 0) {
 623                ewah_free(self);
 624                return;
 625        }
 626
 627        ewah_clear(self);
 628        bitmap_pool[bitmap_pool_size++] = self;
 629}
 630
 631uint32_t ewah_checksum(struct ewah_bitmap *self)
 632{
 633        const uint8_t *p = (uint8_t *)self->buffer;
 634        uint32_t crc = (uint32_t)self->bit_size;
 635        size_t size = self->buffer_size * sizeof(eword_t);
 636
 637        while (size--)
 638                crc = (crc << 5) - crc + (uint32_t)*p++;
 639
 640        return crc;
 641}