librsync  2.3.4
rabinkarp.c
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * rabinkarp -- The RabinKarp rolling checksum.
4  *
5  * Copyright (C) 2019 by Donovan Baarda <abo@minkirri.apana.org.au>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 #include "config.h" /* IWYU pragma: keep */
22 #include "rabinkarp.h"
23 
24 /* Constant for RABINKARP_MULT^2. */
25 #define RABINKARP_MULT2 0xa5b71959U
26 
27 /* Macros for doing 16 bytes with 2 mults that can be done in parallel. Testing
28  showed this as a performance sweet spot vs 16x1, 8x2, 4x4 1x16 alternative
29  arrangements. */
30 #define PAR2X1(hash,buf,i) (RABINKARP_MULT2*(hash) + \
31  RABINKARP_MULT*buf[i] + \
32  buf[i+1])
33 #define PAR2X2(hash,buf,i) PAR2X1(PAR2X1(hash,buf,i),buf,i+2)
34 #define PAR2X4(hash,buf,i) PAR2X2(PAR2X2(hash,buf,i),buf,i+4)
35 #define PAR2X8(hash,buf) PAR2X4(PAR2X4(hash,buf,0),buf,8)
36 
37 /* Table of RABINKARP_MULT^(2^(i+1)) for power lookups. */
38 const static uint32_t RABINKARP_MULT_POW2[32] = {
39  0x08104225U,
40  0xa5b71959U,
41  0xf9c080f1U,
42  0x7c71e2e1U,
43  0x0bb409c1U,
44  0x4dc72381U,
45  0xd17a8701U,
46  0x96260e01U,
47  0x55101c01U,
48  0x2d303801U,
49  0x66a07001U,
50  0xfe40e001U,
51  0xc081c001U,
52  0x91038001U,
53  0x62070001U,
54  0xc40e0001U,
55  0x881c0001U,
56  0x10380001U,
57  0x20700001U,
58  0x40e00001U,
59  0x81c00001U,
60  0x03800001U,
61  0x07000001U,
62  0x0e000001U,
63  0x1c000001U,
64  0x38000001U,
65  0x70000001U,
66  0xe0000001U,
67  0xc0000001U,
68  0x80000001U,
69  0x00000001U,
70  0x00000001U
71 };
72 
73 /* Get the value of RABINKARP_MULT^n. */
74 static inline uint32_t rabinkarp_pow(uint32_t n)
75 {
76  const uint32_t *m = RABINKARP_MULT_POW2;
77  uint32_t ans = 1;
78  while (n) {
79  if (n & 1) {
80  ans *= *m;
81  }
82  m++;
83  n >>= 1;
84  }
85  return ans;
86 }
87 
88 void rabinkarp_update(rabinkarp_t *sum, const unsigned char *buf, size_t len)
89 {
90  size_t n = len;
91  uint32_t hash = sum->hash;
92 
93  while (n >= 16) {
94  hash = PAR2X8(hash, buf);
95  buf += 16;
96  n -= 16;
97  }
98  while (n) {
99  hash = RABINKARP_MULT * hash + *buf++;
100  n--;
101  }
102  sum->hash = hash;
103  sum->count += len;
104  sum->mult *= rabinkarp_pow((uint32_t)len);
105 }
The rabinkarp class implementation of the RabinKarp rollsum.
#define RABINKARP_MULT
The RabinKarp multiplier.
Definition: rabinkarp.h:41
The rabinkarp_t state type.
Definition: rabinkarp.h:56
uint32_t mult
The value of RABINKARP_MULT^count.
Definition: rabinkarp.h:59
size_t count
Count of bytes included in sum.
Definition: rabinkarp.h:57
uint32_t hash
The accumulated hash value.
Definition: rabinkarp.h:58