1/*
2* SHA-1 implementation for PowerPC.
3*
4* Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
5*/
6#define FS 80
78
/*
9* We roll the registers for T, A, B, C, D, E around on each
10* iteration; T on iteration t is A on iteration t+1, and so on.
11* We use registers 7 - 12 for this.
12*/
13#define RT(t) ((((t)+5)%6)+7)
14#define RA(t) ((((t)+4)%6)+7)
15#define RB(t) ((((t)+3)%6)+7)
16#define RC(t) ((((t)+2)%6)+7)
17#define RD(t) ((((t)+1)%6)+7)
18#define RE(t) ((((t)+0)%6)+7)
1920
/* We use registers 16 - 31 for the W values */
21#define W(t) (((t)%16)+16)
2223
#define STEPD0(t) \
24and %r6,RB(t),RC(t); \
25andc %r0,RD(t),RB(t); \
26rotlwi RT(t),RA(t),5; \
27rotlwi RB(t),RB(t),30; \
28or %r6,%r6,%r0; \
29add %r0,RE(t),%r15; \
30add RT(t),RT(t),%r6; \
31add %r0,%r0,W(t); \
32add RT(t),RT(t),%r0
3334
#define STEPD1(t) \
35xor %r6,RB(t),RC(t); \
36rotlwi RT(t),RA(t),5; \
37rotlwi RB(t),RB(t),30; \
38xor %r6,%r6,RD(t); \
39add %r0,RE(t),%r15; \
40add RT(t),RT(t),%r6; \
41add %r0,%r0,W(t); \
42add RT(t),RT(t),%r0
4344
#define STEPD2(t) \
45and %r6,RB(t),RC(t); \
46and %r0,RB(t),RD(t); \
47rotlwi RT(t),RA(t),5; \
48rotlwi RB(t),RB(t),30; \
49or %r6,%r6,%r0; \
50and %r0,RC(t),RD(t); \
51or %r6,%r6,%r0; \
52add %r0,RE(t),%r15; \
53add RT(t),RT(t),%r6; \
54add %r0,%r0,W(t); \
55add RT(t),RT(t),%r0
5657
#define LOADW(t) \
58lwz W(t),(t)*4(%r4)
5960
#define UPDATEW(t) \
61xor %r0,W((t)-3),W((t)-8); \
62xor W(t),W((t)-16),W((t)-14); \
63xor W(t),W(t),%r0; \
64rotlwi W(t),W(t),1
6566
#define STEP0LD4(t) \
67STEPD0(t); LOADW((t)+4); \
68STEPD0((t)+1); LOADW((t)+5); \
69STEPD0((t)+2); LOADW((t)+6); \
70STEPD0((t)+3); LOADW((t)+7)
7172
#define STEPUP4(t, fn) \
73STEP##fn(t); UPDATEW((t)+4); \
74STEP##fn((t)+1); UPDATEW((t)+5); \
75STEP##fn((t)+2); UPDATEW((t)+6); \
76STEP##fn((t)+3); UPDATEW((t)+7)
7778
#define STEPUP20(t, fn) \
79STEPUP4(t, fn); \
80STEPUP4((t)+4, fn); \
81STEPUP4((t)+8, fn); \
82STEPUP4((t)+12, fn); \
83STEPUP4((t)+16, fn)
8485
.globl sha1_core
86sha1_core:
87stwu %r1,-FS(%r1)
88stw %r15,FS-68(%r1)
89stw %r16,FS-64(%r1)
90stw %r17,FS-60(%r1)
91stw %r18,FS-56(%r1)
92stw %r19,FS-52(%r1)
93stw %r20,FS-48(%r1)
94stw %r21,FS-44(%r1)
95stw %r22,FS-40(%r1)
96stw %r23,FS-36(%r1)
97stw %r24,FS-32(%r1)
98stw %r25,FS-28(%r1)
99stw %r26,FS-24(%r1)
100stw %r27,FS-20(%r1)
101stw %r28,FS-16(%r1)
102stw %r29,FS-12(%r1)
103stw %r30,FS-8(%r1)
104stw %r31,FS-4(%r1)
105106
/* Load up A - E */
107lwz RA(0),0(%r3) /* A */
108lwz RB(0),4(%r3) /* B */
109lwz RC(0),8(%r3) /* C */
110lwz RD(0),12(%r3) /* D */
111lwz RE(0),16(%r3) /* E */
112113
mtctr %r5
114115
1: LOADW(0)
116LOADW(1)
117LOADW(2)
118LOADW(3)
119120
lis %r15,0x5a82 /* K0-19 */
121ori %r15,%r15,0x7999
122STEP0LD4(0)
123STEP0LD4(4)
124STEP0LD4(8)
125STEPUP4(12, D0)
126STEPUP4(16, D0)
127128
lis %r15,0x6ed9 /* K20-39 */
129ori %r15,%r15,0xeba1
130STEPUP20(20, D1)
131132
lis %r15,0x8f1b /* K40-59 */
133ori %r15,%r15,0xbcdc
134STEPUP20(40, D2)
135136
lis %r15,0xca62 /* K60-79 */
137ori %r15,%r15,0xc1d6
138STEPUP4(60, D1)
139STEPUP4(64, D1)
140STEPUP4(68, D1)
141STEPUP4(72, D1)
142STEPD1(76)
143STEPD1(77)
144STEPD1(78)
145STEPD1(79)
146147
lwz %r20,16(%r3)
148lwz %r19,12(%r3)
149lwz %r18,8(%r3)
150lwz %r17,4(%r3)
151lwz %r16,0(%r3)
152add %r20,RE(80),%r20
153add RD(0),RD(80),%r19
154add RC(0),RC(80),%r18
155add RB(0),RB(80),%r17
156add RA(0),RA(80),%r16
157mr RE(0),%r20
158stw RA(0),0(%r3)
159stw RB(0),4(%r3)
160stw RC(0),8(%r3)
161stw RD(0),12(%r3)
162stw RE(0),16(%r3)
163164
addi %r4,%r4,64
165bdnz 1b
166167
lwz %r15,FS-68(%r1)
168lwz %r16,FS-64(%r1)
169lwz %r17,FS-60(%r1)
170lwz %r18,FS-56(%r1)
171lwz %r19,FS-52(%r1)
172lwz %r20,FS-48(%r1)
173lwz %r21,FS-44(%r1)
174lwz %r22,FS-40(%r1)
175lwz %r23,FS-36(%r1)
176lwz %r24,FS-32(%r1)
177lwz %r25,FS-28(%r1)
178lwz %r26,FS-24(%r1)
179lwz %r27,FS-20(%r1)
180lwz %r28,FS-16(%r1)
181lwz %r29,FS-12(%r1)
182lwz %r30,FS-8(%r1)
183lwz %r31,FS-4(%r1)
184addi %r1,%r1,FS
185blr