ewah / ewah_bitmap.con commit Merge branch 'dz/credential-doc-url-matching-rules' into maint (871955c)
   1/**
   2 * Copyright 2013, GitHub, Inc
   3 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
   4 *      David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "git-compat-util.h"
  20#include "ewok.h"
  21#include "ewok_rlw.h"
  22
  23static inline size_t min_size(size_t a, size_t b)
  24{
  25        return a < b ? a : b;
  26}
  27
  28static inline size_t max_size(size_t a, size_t b)
  29{
  30        return a > b ? a : b;
  31}
  32
  33static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size)
  34{
  35        size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer;
  36
  37        if (self->alloc_size >= new_size)
  38                return;
  39
  40        self->alloc_size = new_size;
  41        REALLOC_ARRAY(self->buffer, self->alloc_size);
  42        self->rlw = self->buffer + (rlw_offset / sizeof(eword_t));
  43}
  44
  45static inline void buffer_push(struct ewah_bitmap *self, eword_t value)
  46{
  47        if (self->buffer_size + 1 >= self->alloc_size)
  48                buffer_grow(self, self->buffer_size * 3 / 2);
  49
  50        self->buffer[self->buffer_size++] = value;
  51}
  52
  53static void buffer_push_rlw(struct ewah_bitmap *self, eword_t value)
  54{
  55        buffer_push(self, value);
  56        self->rlw = self->buffer + self->buffer_size - 1;
  57}
  58
  59static size_t add_empty_words(struct ewah_bitmap *self, int v, size_t number)
  60{
  61        size_t added = 0;
  62        eword_t runlen, can_add;
  63
  64        if (rlw_get_run_bit(self->rlw) != v && rlw_size(self->rlw) == 0) {
  65                rlw_set_run_bit(self->rlw, v);
  66        } else if (rlw_get_literal_words(self->rlw) != 0 ||
  67                        rlw_get_run_bit(self->rlw) != v) {
  68                buffer_push_rlw(self, 0);
  69                if (v) rlw_set_run_bit(self->rlw, v);
  70                added++;
  71        }
  72
  73        runlen = rlw_get_running_len(self->rlw);
  74        can_add = min_size(number, RLW_LARGEST_RUNNING_COUNT - runlen);
  75
  76        rlw_set_running_len(self->rlw, runlen + can_add);
  77        number -= can_add;
  78
  79        while (number >= RLW_LARGEST_RUNNING_COUNT) {
  80                buffer_push_rlw(self, 0);
  81                added++;
  82                if (v) rlw_set_run_bit(self->rlw, v);
  83                rlw_set_running_len(self->rlw, RLW_LARGEST_RUNNING_COUNT);
  84                number -= RLW_LARGEST_RUNNING_COUNT;
  85        }
  86
  87        if (number > 0) {
  88                buffer_push_rlw(self, 0);
  89                added++;
  90
  91                if (v) rlw_set_run_bit(self->rlw, v);
  92                rlw_set_running_len(self->rlw, number);
  93        }
  94
  95        return added;
  96}
  97
  98size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number)
  99{
 100        if (number == 0)
 101                return 0;
 102
 103        self->bit_size += number * BITS_IN_EWORD;
 104        return add_empty_words(self, v, number);
 105}
 106
 107static size_t add_literal(struct ewah_bitmap *self, eword_t new_data)
 108{
 109        eword_t current_num = rlw_get_literal_words(self->rlw);
 110
 111        if (current_num >= RLW_LARGEST_LITERAL_COUNT) {
 112                buffer_push_rlw(self, 0);
 113
 114                rlw_set_literal_words(self->rlw, 1);
 115                buffer_push(self, new_data);
 116                return 2;
 117        }
 118
 119        rlw_set_literal_words(self->rlw, current_num + 1);
 120
 121        /* sanity check */
 122        assert(rlw_get_literal_words(self->rlw) == current_num + 1);
 123
 124        buffer_push(self, new_data);
 125        return 1;
 126}
 127
 128void ewah_add_dirty_words(
 129        struct ewah_bitmap *self, const eword_t *buffer,
 130        size_t number, int negate)
 131{
 132        size_t literals, can_add;
 133
 134        while (1) {
 135                literals = rlw_get_literal_words(self->rlw);
 136                can_add = min_size(number, RLW_LARGEST_LITERAL_COUNT - literals);
 137
 138                rlw_set_literal_words(self->rlw, literals + can_add);
 139
 140                if (self->buffer_size + can_add >= self->alloc_size)
 141                        buffer_grow(self, (self->buffer_size + can_add) * 3 / 2);
 142
 143                if (negate) {
 144                        size_t i;
 145                        for (i = 0; i < can_add; ++i)
 146                                self->buffer[self->buffer_size++] = ~buffer[i];
 147                } else {
 148                        memcpy(self->buffer + self->buffer_size,
 149                                buffer, can_add * sizeof(eword_t));
 150                        self->buffer_size += can_add;
 151                }
 152
 153                self->bit_size += can_add * BITS_IN_EWORD;
 154
 155                if (number - can_add == 0)
 156                        break;
 157
 158                buffer_push_rlw(self, 0);
 159                buffer += can_add;
 160                number -= can_add;
 161        }
 162}
 163
 164static size_t add_empty_word(struct ewah_bitmap *self, int v)
 165{
 166        int no_literal = (rlw_get_literal_words(self->rlw) == 0);
 167        eword_t run_len = rlw_get_running_len(self->rlw);
 168
 169        if (no_literal && run_len == 0) {
 170                rlw_set_run_bit(self->rlw, v);
 171                assert(rlw_get_run_bit(self->rlw) == v);
 172        }
 173
 174        if (no_literal && rlw_get_run_bit(self->rlw) == v &&
 175                run_len < RLW_LARGEST_RUNNING_COUNT) {
 176                rlw_set_running_len(self->rlw, run_len + 1);
 177                assert(rlw_get_running_len(self->rlw) == run_len + 1);
 178                return 0;
 179        } else {
 180                buffer_push_rlw(self, 0);
 181
 182                assert(rlw_get_running_len(self->rlw) == 0);
 183                assert(rlw_get_run_bit(self->rlw) == 0);
 184                assert(rlw_get_literal_words(self->rlw) == 0);
 185
 186                rlw_set_run_bit(self->rlw, v);
 187                assert(rlw_get_run_bit(self->rlw) == v);
 188
 189                rlw_set_running_len(self->rlw, 1);
 190                assert(rlw_get_running_len(self->rlw) == 1);
 191                assert(rlw_get_literal_words(self->rlw) == 0);
 192                return 1;
 193        }
 194}
 195
 196size_t ewah_add(struct ewah_bitmap *self, eword_t word)
 197{
 198        self->bit_size += BITS_IN_EWORD;
 199
 200        if (word == 0)
 201                return add_empty_word(self, 0);
 202
 203        if (word == (eword_t)(~0))
 204                return add_empty_word(self, 1);
 205
 206        return add_literal(self, word);
 207}
 208
 209void ewah_set(struct ewah_bitmap *self, size_t i)
 210{
 211        const size_t dist =
 212                DIV_ROUND_UP(i + 1, BITS_IN_EWORD) -
 213                DIV_ROUND_UP(self->bit_size, BITS_IN_EWORD);
 214
 215        assert(i >= self->bit_size);
 216
 217        self->bit_size = i + 1;
 218
 219        if (dist > 0) {
 220                if (dist > 1)
 221                        add_empty_words(self, 0, dist - 1);
 222
 223                add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
 224                return;
 225        }
 226
 227        if (rlw_get_literal_words(self->rlw) == 0) {
 228                rlw_set_running_len(self->rlw,
 229                        rlw_get_running_len(self->rlw) - 1);
 230                add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
 231                return;
 232        }
 233
 234        self->buffer[self->buffer_size - 1] |=
 235                ((eword_t)1 << (i % BITS_IN_EWORD));
 236
 237        /* check if we just completed a stream of 1s */
 238        if (self->buffer[self->buffer_size - 1] == (eword_t)(~0)) {
 239                self->buffer[--self->buffer_size] = 0;
 240                rlw_set_literal_words(self->rlw,
 241                        rlw_get_literal_words(self->rlw) - 1);
 242                add_empty_word(self, 1);
 243        }
 244}
 245
 246void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload)
 247{
 248        size_t pos = 0;
 249        size_t pointer = 0;
 250        size_t k;
 251
 252        while (pointer < self->buffer_size) {
 253                eword_t *word = &self->buffer[pointer];
 254
 255                if (rlw_get_run_bit(word)) {
 256                        size_t len = rlw_get_running_len(word) * BITS_IN_EWORD;
 257                        for (k = 0; k < len; ++k, ++pos)
 258                                callback(pos, payload);
 259                } else {
 260                        pos += rlw_get_running_len(word) * BITS_IN_EWORD;
 261                }
 262
 263                ++pointer;
 264
 265                for (k = 0; k < rlw_get_literal_words(word); ++k) {
 266                        int c;
 267
 268                        /* todo: zero count optimization */
 269                        for (c = 0; c < BITS_IN_EWORD; ++c, ++pos) {
 270                                if ((self->buffer[pointer] & ((eword_t)1 << c)) != 0)
 271                                        callback(pos, payload);
 272                        }
 273
 274                        ++pointer;
 275                }
 276        }
 277}
 278
 279/**
 280 * Clear all the bits in the bitmap. Does not free or resize
 281 * memory.
 282 */
 283static void ewah_clear(struct ewah_bitmap *self)
 284{
 285        self->buffer_size = 1;
 286        self->buffer[0] = 0;
 287        self->bit_size = 0;
 288        self->rlw = self->buffer;
 289}
 290
 291struct ewah_bitmap *ewah_new(void)
 292{
 293        struct ewah_bitmap *self;
 294
 295        self = xmalloc(sizeof(struct ewah_bitmap));
 296        self->alloc_size = 32;
 297        ALLOC_ARRAY(self->buffer, self->alloc_size);
 298
 299        ewah_clear(self);
 300        return self;
 301}
 302
 303void ewah_free(struct ewah_bitmap *self)
 304{
 305        if (!self)
 306                return;
 307
 308        if (self->alloc_size)
 309                free(self->buffer);
 310
 311        free(self);
 312}
 313
 314static void read_new_rlw(struct ewah_iterator *it)
 315{
 316        const eword_t *word = NULL;
 317
 318        it->literals = 0;
 319        it->compressed = 0;
 320
 321        while (1) {
 322                word = &it->buffer[it->pointer];
 323
 324                it->rl = rlw_get_running_len(word);
 325                it->lw = rlw_get_literal_words(word);
 326                it->b = rlw_get_run_bit(word);
 327
 328                if (it->rl || it->lw)
 329                        return;
 330
 331                if (it->pointer < it->buffer_size - 1) {
 332                        it->pointer++;
 333                } else {
 334                        it->pointer = it->buffer_size;
 335                        return;
 336                }
 337        }
 338}
 339
 340int ewah_iterator_next(eword_t *next, struct ewah_iterator *it)
 341{
 342        if (it->pointer >= it->buffer_size)
 343                return 0;
 344
 345        if (it->compressed < it->rl) {
 346                it->compressed++;
 347                *next = it->b ? (eword_t)(~0) : 0;
 348        } else {
 349                assert(it->literals < it->lw);
 350
 351                it->literals++;
 352                it->pointer++;
 353
 354                assert(it->pointer < it->buffer_size);
 355
 356                *next = it->buffer[it->pointer];
 357        }
 358
 359        if (it->compressed == it->rl && it->literals == it->lw) {
 360                if (++it->pointer < it->buffer_size)
 361                        read_new_rlw(it);
 362        }
 363
 364        return 1;
 365}
 366
 367void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent)
 368{
 369        it->buffer = parent->buffer;
 370        it->buffer_size = parent->buffer_size;
 371        it->pointer = 0;
 372
 373        it->lw = 0;
 374        it->rl = 0;
 375        it->compressed = 0;
 376        it->literals = 0;
 377        it->b = 0;
 378
 379        if (it->pointer < it->buffer_size)
 380                read_new_rlw(it);
 381}
 382
 383void ewah_xor(
 384        struct ewah_bitmap *ewah_i,
 385        struct ewah_bitmap *ewah_j,
 386        struct ewah_bitmap *out)
 387{
 388        struct rlw_iterator rlw_i;
 389        struct rlw_iterator rlw_j;
 390        size_t literals;
 391
 392        rlwit_init(&rlw_i, ewah_i);
 393        rlwit_init(&rlw_j, ewah_j);
 394
 395        while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) {
 396                while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) {
 397                        struct rlw_iterator *prey, *predator;
 398                        size_t index;
 399                        int negate_words;
 400
 401                        if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) {
 402                                prey = &rlw_i;
 403                                predator = &rlw_j;
 404                        } else {
 405                                prey = &rlw_j;
 406                                predator = &rlw_i;
 407                        }
 408
 409                        negate_words = !!predator->rlw.running_bit;
 410                        index = rlwit_discharge(prey, out,
 411                                predator->rlw.running_len, negate_words);
 412
 413                        ewah_add_empty_words(out, negate_words,
 414                                predator->rlw.running_len - index);
 415
 416                        rlwit_discard_first_words(predator,
 417                                predator->rlw.running_len);
 418                }
 419
 420                literals = min_size(
 421                        rlw_i.rlw.literal_words,
 422                        rlw_j.rlw.literal_words);
 423
 424                if (literals) {
 425                        size_t k;
 426
 427                        for (k = 0; k < literals; ++k) {
 428                                ewah_add(out,
 429                                        rlw_i.buffer[rlw_i.literal_word_start + k] ^
 430                                        rlw_j.buffer[rlw_j.literal_word_start + k]
 431                                );
 432                        }
 433
 434                        rlwit_discard_first_words(&rlw_i, literals);
 435                        rlwit_discard_first_words(&rlw_j, literals);
 436                }
 437        }
 438
 439        if (rlwit_word_size(&rlw_i) > 0)
 440                rlwit_discharge(&rlw_i, out, ~0, 0);
 441        else
 442                rlwit_discharge(&rlw_j, out, ~0, 0);
 443
 444        out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size);
 445}
 446
 447#define BITMAP_POOL_MAX 16
 448static struct ewah_bitmap *bitmap_pool[BITMAP_POOL_MAX];
 449static size_t bitmap_pool_size;
 450
 451struct ewah_bitmap *ewah_pool_new(void)
 452{
 453        if (bitmap_pool_size)
 454                return bitmap_pool[--bitmap_pool_size];
 455
 456        return ewah_new();
 457}
 458
 459void ewah_pool_free(struct ewah_bitmap *self)
 460{
 461        if (self == NULL)
 462                return;
 463
 464        if (bitmap_pool_size == BITMAP_POOL_MAX ||
 465                self->alloc_size == 0) {
 466                ewah_free(self);
 467                return;
 468        }
 469
 470        ewah_clear(self);
 471        bitmap_pool[bitmap_pool_size++] = self;
 472}
 473
 474uint32_t ewah_checksum(struct ewah_bitmap *self)
 475{
 476        const uint8_t *p = (uint8_t *)self->buffer;
 477        uint32_t crc = (uint32_t)self->bit_size;
 478        size_t size = self->buffer_size * sizeof(eword_t);
 479
 480        while (size--)
 481                crc = (crc << 5) - crc + (uint32_t)*p++;
 482
 483        return crc;
 484}