$OpenBSD: patch-lib_isc_sha1_c,v 1.1 2003/01/25 14:14:37 jakob Exp $
--- lib/isc/sha1.c.orig	Tue Jan  9 22:56:28 2001
+++ lib/isc/sha1.c	Tue Jan 21 08:55:13 2003
@@ -85,6 +85,67 @@ typedef union {
 	unsigned int l[16];
 } CHAR64LONG16;
 
+#ifdef __sparc_v9__
+static void do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
+static void do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
+static void do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
+static void do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
+
+#define nR0(v,w,x,y,z,i) R0(*v,*w,*x,*y,*z,i)
+#define nR1(v,w,x,y,z,i) R1(*v,*w,*x,*y,*z,i)
+#define nR2(v,w,x,y,z,i) R2(*v,*w,*x,*y,*z,i)
+#define nR3(v,w,x,y,z,i) R3(*v,*w,*x,*y,*z,i)
+#define nR4(v,w,x,y,z,i) R4(*v,*w,*x,*y,*z,i)
+
+static void
+do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
+{
+	nR0(a,b,c,d,e, 0); nR0(e,a,b,c,d, 1); nR0(d,e,a,b,c, 2);
+	nR0(c,d,e,a,b, 3); nR0(b,c,d,e,a, 4); nR0(a,b,c,d,e, 5);
+	nR0(e,a,b,c,d, 6); nR0(d,e,a,b,c, 7); nR0(c,d,e,a,b, 8);
+	nR0(b,c,d,e,a, 9); nR0(a,b,c,d,e,10); nR0(e,a,b,c,d,11);
+	nR0(d,e,a,b,c,12); nR0(c,d,e,a,b,13); nR0(b,c,d,e,a,14);
+	nR0(a,b,c,d,e,15); nR1(e,a,b,c,d,16); nR1(d,e,a,b,c,17);
+	nR1(c,d,e,a,b,18); nR1(b,c,d,e,a,19);
+}
+
+static void
+do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
+{
+	nR2(a,b,c,d,e,20); nR2(e,a,b,c,d,21); nR2(d,e,a,b,c,22);
+	nR2(c,d,e,a,b,23); nR2(b,c,d,e,a,24); nR2(a,b,c,d,e,25);
+	nR2(e,a,b,c,d,26); nR2(d,e,a,b,c,27); nR2(c,d,e,a,b,28);
+	nR2(b,c,d,e,a,29); nR2(a,b,c,d,e,30); nR2(e,a,b,c,d,31);
+	nR2(d,e,a,b,c,32); nR2(c,d,e,a,b,33); nR2(b,c,d,e,a,34);
+	nR2(a,b,c,d,e,35); nR2(e,a,b,c,d,36); nR2(d,e,a,b,c,37);
+	nR2(c,d,e,a,b,38); nR2(b,c,d,e,a,39);
+}
+
+static void
+do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
+{
+	nR3(a,b,c,d,e,40); nR3(e,a,b,c,d,41); nR3(d,e,a,b,c,42);
+	nR3(c,d,e,a,b,43); nR3(b,c,d,e,a,44); nR3(a,b,c,d,e,45);
+	nR3(e,a,b,c,d,46); nR3(d,e,a,b,c,47); nR3(c,d,e,a,b,48);
+	nR3(b,c,d,e,a,49); nR3(a,b,c,d,e,50); nR3(e,a,b,c,d,51);
+	nR3(d,e,a,b,c,52); nR3(c,d,e,a,b,53); nR3(b,c,d,e,a,54);
+	nR3(a,b,c,d,e,55); nR3(e,a,b,c,d,56); nR3(d,e,a,b,c,57);
+	nR3(c,d,e,a,b,58); nR3(b,c,d,e,a,59);
+}
+
+static void
+do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
+{
+	nR4(a,b,c,d,e,60); nR4(e,a,b,c,d,61); nR4(d,e,a,b,c,62);
+	nR4(c,d,e,a,b,63); nR4(b,c,d,e,a,64); nR4(a,b,c,d,e,65);
+	nR4(e,a,b,c,d,66); nR4(d,e,a,b,c,67); nR4(c,d,e,a,b,68);
+	nR4(b,c,d,e,a,69); nR4(a,b,c,d,e,70); nR4(e,a,b,c,d,71);
+	nR4(d,e,a,b,c,72); nR4(c,d,e,a,b,73); nR4(b,c,d,e,a,74);
+	nR4(a,b,c,d,e,75); nR4(e,a,b,c,d,76); nR4(d,e,a,b,c,77);
+	nR4(c,d,e,a,b,78); nR4(b,c,d,e,a,79);
+}
+#endif
+
 /*
  * Hash a single 512-bit block. This is the core of the algorithm.
  */
@@ -92,12 +153,12 @@ static void
 transform(isc_uint32_t state[5], const unsigned char buffer[64]) {
 	isc_uint32_t a, b, c, d, e;
 	CHAR64LONG16 *block;
-	unsigned char workspace[64];
+	CHAR64LONG16 workspace;
 
 	INSIST(buffer != NULL);
 	INSIST(state != NULL);
 
-	block = (CHAR64LONG16 *)(void *)workspace;
+	block = &workspace;
 	(void)memcpy(block, buffer, 64);
 
 	/* Copy context->state[] to working vars */
@@ -107,6 +168,12 @@ transform(isc_uint32_t state[5], const u
 	d = state[3];
 	e = state[4];
 
+#ifdef __sparc_v9__
+	do_R01(&a, &b, &c, &d, &e, block);
+	do_R2(&a, &b, &c, &d, &e, block);
+	do_R3(&a, &b, &c, &d, &e, block);
+	do_R4(&a, &b, &c, &d, &e, block);
+#else
 	/* 4 rounds of 20 operations each. Loop unrolled. */
 	R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
 	R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
@@ -128,6 +195,7 @@ transform(isc_uint32_t state[5], const u
 	R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
 	R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
 	R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+#endif
 
 	/* Add the working vars back into context.state[] */
 	state[0] += a;
