1/*2* SHA-1 implementation for PowerPC.3*4* Copyright (C) 2005 Paul Mackerras <paulus@samba.org>5*/6#define FS 8078/*9* We roll the registers for T, A, B, C, D, E around on each10* iteration; T on iteration t is A on iteration t+1, and so on.11* We use registers 7 - 12 for this.12*/13#define RT(t) ((((t)+5)%6)+7)14#define RA(t) ((((t)+4)%6)+7)15#define RB(t) ((((t)+3)%6)+7)16#define RC(t) ((((t)+2)%6)+7)17#define RD(t) ((((t)+1)%6)+7)18#define RE(t) ((((t)+0)%6)+7)1920/* We use registers 16 - 31 for the W values */21#define W(t) (((t)%16)+16)2223#define STEPD0(t) \24and %r6,RB(t),RC(t); \25andc %r0,RD(t),RB(t); \26rotlwi RT(t),RA(t),5; \27rotlwi RB(t),RB(t),30; \28or %r6,%r6,%r0; \29add %r0,RE(t),%r15; \30add RT(t),RT(t),%r6; \31add %r0,%r0,W(t); \32add RT(t),RT(t),%r03334#define STEPD1(t) \35xor %r6,RB(t),RC(t); \36rotlwi RT(t),RA(t),5; \37rotlwi RB(t),RB(t),30; \38xor %r6,%r6,RD(t); \39add %r0,RE(t),%r15; \40add RT(t),RT(t),%r6; \41add %r0,%r0,W(t); \42add RT(t),RT(t),%r04344#define STEPD2(t) \45and %r6,RB(t),RC(t); \46and %r0,RB(t),RD(t); \47rotlwi RT(t),RA(t),5; \48rotlwi RB(t),RB(t),30; \49or %r6,%r6,%r0; \50and %r0,RC(t),RD(t); \51or %r6,%r6,%r0; \52add %r0,RE(t),%r15; \53add RT(t),RT(t),%r6; \54add %r0,%r0,W(t); \55add RT(t),RT(t),%r05657#define LOADW(t) \58lwz W(t),(t)*4(%r4)5960#define UPDATEW(t) \61xor %r0,W((t)-3),W((t)-8); \62xor W(t),W((t)-16),W((t)-14); \63xor W(t),W(t),%r0; \64rotlwi W(t),W(t),16566#define STEP0LD4(t) \67STEPD0(t); LOADW((t)+4); \68STEPD0((t)+1); LOADW((t)+5); \69STEPD0((t)+2); LOADW((t)+6); \70STEPD0((t)+3); LOADW((t)+7)7172#define STEPUP4(t, fn) \73STEP##fn(t); UPDATEW((t)+4); \74STEP##fn((t)+1); UPDATEW((t)+5); \75STEP##fn((t)+2); UPDATEW((t)+6); \76STEP##fn((t)+3); UPDATEW((t)+7)7778#define STEPUP20(t, fn) \79STEPUP4(t, fn); \80STEPUP4((t)+4, fn); \81STEPUP4((t)+8, fn); \82STEPUP4((t)+12, fn); \83STEPUP4((t)+16, fn)8485.globl sha1_core86sha1_core:87stwu %r1,-FS(%r1)88stw %r15,FS-68(%r1)89stw %r16,FS-64(%r1)90stw %r17,FS-60(%r1)91stw %r18,FS-56(%r1)92stw %r19,FS-52(%r1)93stw %r20,FS-48(%r1)94stw %r21,FS-44(%r1)95stw %r22,FS-40(%r1)96stw %r23,FS-36(%r1)97stw %r24,FS-32(%r1)98stw %r25,FS-28(%r1)99stw %r26,FS-24(%r1)100stw %r27,FS-20(%r1)101stw %r28,FS-16(%r1)102stw %r29,FS-12(%r1)103stw %r30,FS-8(%r1)104stw %r31,FS-4(%r1)105106/* Load up A - E */107lwz RA(0),0(%r3) /* A */108lwz RB(0),4(%r3) /* B */109lwz RC(0),8(%r3) /* C */110lwz RD(0),12(%r3) /* D */111lwz RE(0),16(%r3) /* E */112113mtctr %r51141151: LOADW(0)116LOADW(1)117LOADW(2)118LOADW(3)119120lis %r15,0x5a82 /* K0-19 */121ori %r15,%r15,0x7999122STEP0LD4(0)123STEP0LD4(4)124STEP0LD4(8)125STEPUP4(12, D0)126STEPUP4(16, D0)127128lis %r15,0x6ed9 /* K20-39 */129ori %r15,%r15,0xeba1130STEPUP20(20, D1)131132lis %r15,0x8f1b /* K40-59 */133ori %r15,%r15,0xbcdc134STEPUP20(40, D2)135136lis %r15,0xca62 /* K60-79 */137ori %r15,%r15,0xc1d6138STEPUP4(60, D1)139STEPUP4(64, D1)140STEPUP4(68, D1)141STEPUP4(72, D1)142STEPD1(76)143STEPD1(77)144STEPD1(78)145STEPD1(79)146147lwz %r20,16(%r3)148lwz %r19,12(%r3)149lwz %r18,8(%r3)150lwz %r17,4(%r3)151lwz %r16,0(%r3)152add %r20,RE(80),%r20153add RD(0),RD(80),%r19154add RC(0),RC(80),%r18155add RB(0),RB(80),%r17156add RA(0),RA(80),%r16157mr RE(0),%r20158stw RA(0),0(%r3)159stw RB(0),4(%r3)160stw RC(0),8(%r3)161stw RD(0),12(%r3)162stw RE(0),16(%r3)163164addi %r4,%r4,64165bdnz 1b166167lwz %r15,FS-68(%r1)168lwz %r16,FS-64(%r1)169lwz %r17,FS-60(%r1)170lwz %r18,FS-56(%r1)171lwz %r19,FS-52(%r1)172lwz %r20,FS-48(%r1)173lwz %r21,FS-44(%r1)174lwz %r22,FS-40(%r1)175lwz %r23,FS-36(%r1)176lwz %r24,FS-32(%r1)177lwz %r25,FS-28(%r1)178lwz %r26,FS-24(%r1)179lwz %r27,FS-20(%r1)180lwz %r28,FS-16(%r1)181lwz %r29,FS-12(%r1)182lwz %r30,FS-8(%r1)183lwz %r31,FS-4(%r1)184addi %r1,%r1,FS185blr