ppc / sha1ppc.Son commit Merge branch 'jc/simpack' into next (fb8e008)
   1/*
   2 * SHA-1 implementation for PowerPC.
   3 *
   4 * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
   5 */
   6#define FS      80
   7
   8/*
   9 * We roll the registers for T, A, B, C, D, E around on each
  10 * iteration; T on iteration t is A on iteration t+1, and so on.
  11 * We use registers 7 - 12 for this.
  12 */
  13#define RT(t)   ((((t)+5)%6)+7)
  14#define RA(t)   ((((t)+4)%6)+7)
  15#define RB(t)   ((((t)+3)%6)+7)
  16#define RC(t)   ((((t)+2)%6)+7)
  17#define RD(t)   ((((t)+1)%6)+7)
  18#define RE(t)   ((((t)+0)%6)+7)
  19
  20/* We use registers 16 - 31 for the W values */
  21#define W(t)    (((t)%16)+16)
  22
  23#define STEPD0(t)                               \
  24        and     %r6,RB(t),RC(t);                \
  25        andc    %r0,RD(t),RB(t);                \
  26        rotlwi  RT(t),RA(t),5;                  \
  27        rotlwi  RB(t),RB(t),30;                 \
  28        or      %r6,%r6,%r0;                    \
  29        add     %r0,RE(t),%r15;                 \
  30        add     RT(t),RT(t),%r6;                \
  31        add     %r0,%r0,W(t);                   \
  32        add     RT(t),RT(t),%r0
  33
  34#define STEPD1(t)                               \
  35        xor     %r6,RB(t),RC(t);                \
  36        rotlwi  RT(t),RA(t),5;                  \
  37        rotlwi  RB(t),RB(t),30;                 \
  38        xor     %r6,%r6,RD(t);                  \
  39        add     %r0,RE(t),%r15;                 \
  40        add     RT(t),RT(t),%r6;                \
  41        add     %r0,%r0,W(t);                   \
  42        add     RT(t),RT(t),%r0
  43
  44#define STEPD2(t)                               \
  45        and     %r6,RB(t),RC(t);                \
  46        and     %r0,RB(t),RD(t);                \
  47        rotlwi  RT(t),RA(t),5;                  \
  48        rotlwi  RB(t),RB(t),30;                 \
  49        or      %r6,%r6,%r0;                    \
  50        and     %r0,RC(t),RD(t);                \
  51        or      %r6,%r6,%r0;                    \
  52        add     %r0,RE(t),%r15;                 \
  53        add     RT(t),RT(t),%r6;                \
  54        add     %r0,%r0,W(t);                   \
  55        add     RT(t),RT(t),%r0
  56
  57#define LOADW(t)                                \
  58        lwz     W(t),(t)*4(%r4)
  59
  60#define UPDATEW(t)                              \
  61        xor     %r0,W((t)-3),W((t)-8);          \
  62        xor     W(t),W((t)-16),W((t)-14);       \
  63        xor     W(t),W(t),%r0;                  \
  64        rotlwi  W(t),W(t),1
  65
  66#define STEP0LD4(t)                             \
  67        STEPD0(t);   LOADW((t)+4);              \
  68        STEPD0((t)+1); LOADW((t)+5);            \
  69        STEPD0((t)+2); LOADW((t)+6);            \
  70        STEPD0((t)+3); LOADW((t)+7)
  71
  72#define STEPUP4(t, fn)                          \
  73        STEP##fn(t);   UPDATEW((t)+4);          \
  74        STEP##fn((t)+1); UPDATEW((t)+5);        \
  75        STEP##fn((t)+2); UPDATEW((t)+6);        \
  76        STEP##fn((t)+3); UPDATEW((t)+7)
  77
  78#define STEPUP20(t, fn)                         \
  79        STEPUP4(t, fn);                         \
  80        STEPUP4((t)+4, fn);                     \
  81        STEPUP4((t)+8, fn);                     \
  82        STEPUP4((t)+12, fn);                    \
  83        STEPUP4((t)+16, fn)
  84
  85        .globl  sha1_core
  86sha1_core:
  87        stwu    %r1,-FS(%r1)
  88        stw     %r15,FS-68(%r1)
  89        stw     %r16,FS-64(%r1)
  90        stw     %r17,FS-60(%r1)
  91        stw     %r18,FS-56(%r1)
  92        stw     %r19,FS-52(%r1)
  93        stw     %r20,FS-48(%r1)
  94        stw     %r21,FS-44(%r1)
  95        stw     %r22,FS-40(%r1)
  96        stw     %r23,FS-36(%r1)
  97        stw     %r24,FS-32(%r1)
  98        stw     %r25,FS-28(%r1)
  99        stw     %r26,FS-24(%r1)
 100        stw     %r27,FS-20(%r1)
 101        stw     %r28,FS-16(%r1)
 102        stw     %r29,FS-12(%r1)
 103        stw     %r30,FS-8(%r1)
 104        stw     %r31,FS-4(%r1)
 105
 106        /* Load up A - E */
 107        lwz     RA(0),0(%r3)    /* A */
 108        lwz     RB(0),4(%r3)    /* B */
 109        lwz     RC(0),8(%r3)    /* C */
 110        lwz     RD(0),12(%r3)   /* D */
 111        lwz     RE(0),16(%r3)   /* E */
 112
 113        mtctr   %r5
 114
 1151:      LOADW(0)
 116        LOADW(1)
 117        LOADW(2)
 118        LOADW(3)
 119
 120        lis     %r15,0x5a82     /* K0-19 */
 121        ori     %r15,%r15,0x7999
 122        STEP0LD4(0)
 123        STEP0LD4(4)
 124        STEP0LD4(8)
 125        STEPUP4(12, D0)
 126        STEPUP4(16, D0)
 127
 128        lis     %r15,0x6ed9     /* K20-39 */
 129        ori     %r15,%r15,0xeba1
 130        STEPUP20(20, D1)
 131
 132        lis     %r15,0x8f1b     /* K40-59 */
 133        ori     %r15,%r15,0xbcdc
 134        STEPUP20(40, D2)
 135
 136        lis     %r15,0xca62     /* K60-79 */
 137        ori     %r15,%r15,0xc1d6
 138        STEPUP4(60, D1)
 139        STEPUP4(64, D1)
 140        STEPUP4(68, D1)
 141        STEPUP4(72, D1)
 142        STEPD1(76)
 143        STEPD1(77)
 144        STEPD1(78)
 145        STEPD1(79)
 146
 147        lwz     %r20,16(%r3)
 148        lwz     %r19,12(%r3)
 149        lwz     %r18,8(%r3)
 150        lwz     %r17,4(%r3)
 151        lwz     %r16,0(%r3)
 152        add     %r20,RE(80),%r20
 153        add     RD(0),RD(80),%r19
 154        add     RC(0),RC(80),%r18
 155        add     RB(0),RB(80),%r17
 156        add     RA(0),RA(80),%r16
 157        mr      RE(0),%r20
 158        stw     RA(0),0(%r3)
 159        stw     RB(0),4(%r3)
 160        stw     RC(0),8(%r3)
 161        stw     RD(0),12(%r3)
 162        stw     RE(0),16(%r3)
 163
 164        addi    %r4,%r4,64
 165        bdnz    1b
 166
 167        lwz     %r15,FS-68(%r1)
 168        lwz     %r16,FS-64(%r1)
 169        lwz     %r17,FS-60(%r1)
 170        lwz     %r18,FS-56(%r1)
 171        lwz     %r19,FS-52(%r1)
 172        lwz     %r20,FS-48(%r1)
 173        lwz     %r21,FS-44(%r1)
 174        lwz     %r22,FS-40(%r1)
 175        lwz     %r23,FS-36(%r1)
 176        lwz     %r24,FS-32(%r1)
 177        lwz     %r25,FS-28(%r1)
 178        lwz     %r26,FS-24(%r1)
 179        lwz     %r27,FS-20(%r1)
 180        lwz     %r28,FS-16(%r1)
 181        lwz     %r29,FS-12(%r1)
 182        lwz     %r30,FS-8(%r1)
 183        lwz     %r31,FS-4(%r1)
 184        addi    %r1,%r1,FS
 185        blr