From d1ca6fcf8deb82aa1cf1df4219485293a4e34ba4 Mon Sep 17 00:00:00 2001 From: David du Colombier <0intro@gmail.com> Date: Tue, 20 Oct 2015 21:20:00 +0200 Subject: [PATCH 01/10] cmd, lib: import from bitbucket.org/cherry9/plan9-loongson --- spim/bin/ape/psh | 2 + spim/include/ape/float.h | 73 ++++ spim/include/ape/math.h | 78 ++++ spim/include/ape/stdarg.h | 18 + spim/include/ape/ureg.h | 52 +++ spim/include/u.h | 69 +++ spim/include/ureg.h | 44 ++ spim/mkfile | 9 + sys/src/ape/lib/9/spim/getcallerpc.s | 3 + sys/src/ape/lib/9/spim/getfcr.s | 15 + sys/src/ape/lib/ap/spim/atom.s | 52 +++ sys/src/ape/lib/ap/spim/c_fcr0.s | 3 + sys/src/ape/lib/ap/spim/cycles.c | 5 + sys/src/ape/lib/ap/spim/getfcr.s | 15 + sys/src/ape/lib/ap/spim/lock.c | 36 ++ sys/src/ape/lib/ap/spim/lock.pre-sema.c | 177 ++++++++ sys/src/ape/lib/ap/spim/main9.s | 12 + sys/src/ape/lib/ap/spim/main9p.s | 54 +++ sys/src/ape/lib/ap/spim/memchr.s | 39 ++ sys/src/ape/lib/ap/spim/memcmp.s | 116 +++++ sys/src/ape/lib/ap/spim/memmove.s | 161 +++++++ sys/src/ape/lib/ap/spim/memset.s | 88 ++++ sys/src/ape/lib/ap/spim/mkfile | 27 ++ sys/src/ape/lib/ap/spim/notetramp.c | 72 ++++ sys/src/ape/lib/ap/spim/setjmp.s | 24 ++ sys/src/ape/lib/ap/spim/strchr.s | 63 +++ sys/src/ape/lib/ap/spim/strcmp.s | 21 + sys/src/ape/lib/ap/spim/strcpy.s | 92 ++++ sys/src/ape/lib/ap/spim/tas.s | 30 ++ sys/src/ape/lib/ap/spim/vlop.s | 17 + sys/src/ape/lib/ap/spim/vlrt.c | 719 +++++++++++++++++++++++++++++++ sys/src/ape/lib/mp/spim/mkfile | 26 ++ sys/src/ape/lib/sec/spim/mkfile | 23 + sys/src/cmd/gs/arch.h | 2 + sys/src/cmd/gs/spim.h | 46 ++ sys/src/cmd/vc/swt.c | 2 + sys/src/cmd/vl/asm.c | 41 +- sys/src/libc/spim/argv0.s | 4 + sys/src/libc/spim/atom.s | 52 +++ sys/src/libc/spim/c_fcr0.s | 3 + sys/src/libc/spim/cycles.c | 10 + sys/src/libc/spim/getcallerpc.s | 3 + sys/src/libc/spim/getfcr.s | 15 + sys/src/libc/spim/main9.s | 25 ++ sys/src/libc/spim/main9p.s | 41 ++ sys/src/libc/spim/memccpy.s | 20 + sys/src/libc/spim/memchr.s | 39 ++ sys/src/libc/spim/memcmp.s | 116 +++++ sys/src/libc/spim/memmove.s | 237 +++++++++++ sys/src/libc/spim/memset.s | 88 ++++ sys/src/libc/spim/mkfile | 40 ++ sys/src/libc/spim/notejmp.c | 16 + sys/src/libc/spim/setjmp.s | 14 + sys/src/libc/spim/sqrt.c | 103 +++++ sys/src/libc/spim/strchr.s | 63 +++ sys/src/libc/spim/strcmp.s | 21 + sys/src/libc/spim/strcpy.s | 92 ++++ sys/src/libc/spim/tas.s | 30 ++ sys/src/libc/spim/vlop.s | 17 + sys/src/libc/spim/vlrt.c | 722 ++++++++++++++++++++++++++++++++ sys/src/libmach/vdb.c | 4 +- sys/src/libmp/spim/mkfile | 21 + sys/src/libmp/spim/mpvecadd.s | 67 +++ sys/src/libmp/spim/mpvecdigmuladd.s | 58 +++ sys/src/libmp/spim/mpvecdigmulsub.s | 61 +++ sys/src/libmp/spim/mpvecsub.s | 66 +++ sys/src/libsec/spim/md5block.s | 296 +++++++++++++ sys/src/libsec/spim/mkfile | 19 + sys/src/libsec/spim/sha1block.s | 220 ++++++++++ sys/src/libthread/spim.c | 26 ++ sys/src/libthread/xincspim.s | 46 ++ 71 files changed, 4970 insertions(+), 11 deletions(-) create mode 100755 spim/bin/ape/psh create mode 100644 spim/include/ape/float.h create mode 100644 spim/include/ape/math.h create mode 100644 spim/include/ape/stdarg.h create mode 100644 spim/include/ape/ureg.h create mode 100644 spim/include/u.h create mode 100644 spim/include/ureg.h create mode 100644 spim/mkfile create mode 100644 sys/src/ape/lib/9/spim/getcallerpc.s create mode 100644 sys/src/ape/lib/9/spim/getfcr.s create mode 100644 sys/src/ape/lib/ap/spim/atom.s create mode 100644 sys/src/ape/lib/ap/spim/c_fcr0.s create mode 100644 sys/src/ape/lib/ap/spim/cycles.c create mode 100644 sys/src/ape/lib/ap/spim/getfcr.s create mode 100644 sys/src/ape/lib/ap/spim/lock.c create mode 100644 sys/src/ape/lib/ap/spim/lock.pre-sema.c create mode 100644 sys/src/ape/lib/ap/spim/main9.s create mode 100644 sys/src/ape/lib/ap/spim/main9p.s create mode 100644 sys/src/ape/lib/ap/spim/memchr.s create mode 100644 sys/src/ape/lib/ap/spim/memcmp.s create mode 100644 sys/src/ape/lib/ap/spim/memmove.s create mode 100644 sys/src/ape/lib/ap/spim/memset.s create mode 100644 sys/src/ape/lib/ap/spim/mkfile create mode 100644 sys/src/ape/lib/ap/spim/notetramp.c create mode 100644 sys/src/ape/lib/ap/spim/setjmp.s create mode 100644 sys/src/ape/lib/ap/spim/strchr.s create mode 100644 sys/src/ape/lib/ap/spim/strcmp.s create mode 100644 sys/src/ape/lib/ap/spim/strcpy.s create mode 100644 sys/src/ape/lib/ap/spim/tas.s create mode 100644 sys/src/ape/lib/ap/spim/vlop.s create mode 100644 sys/src/ape/lib/ap/spim/vlrt.c create mode 100644 sys/src/ape/lib/mp/spim/mkfile create mode 100644 sys/src/ape/lib/sec/spim/mkfile create mode 100644 sys/src/cmd/gs/spim.h create mode 100644 sys/src/libc/spim/argv0.s create mode 100644 sys/src/libc/spim/atom.s create mode 100644 sys/src/libc/spim/c_fcr0.s create mode 100644 sys/src/libc/spim/cycles.c create mode 100644 sys/src/libc/spim/getcallerpc.s create mode 100644 sys/src/libc/spim/getfcr.s create mode 100644 sys/src/libc/spim/main9.s create mode 100644 sys/src/libc/spim/main9p.s create mode 100644 sys/src/libc/spim/memccpy.s create mode 100644 sys/src/libc/spim/memchr.s create mode 100644 sys/src/libc/spim/memcmp.s create mode 100644 sys/src/libc/spim/memmove.s create mode 100644 sys/src/libc/spim/memset.s create mode 100644 sys/src/libc/spim/mkfile create mode 100644 sys/src/libc/spim/notejmp.c create mode 100644 sys/src/libc/spim/setjmp.s create mode 100644 sys/src/libc/spim/sqrt.c create mode 100644 sys/src/libc/spim/strchr.s create mode 100644 sys/src/libc/spim/strcmp.s create mode 100644 sys/src/libc/spim/strcpy.s create mode 100644 sys/src/libc/spim/tas.s create mode 100644 sys/src/libc/spim/vlop.s create mode 100644 sys/src/libc/spim/vlrt.c create mode 100644 sys/src/libmp/spim/mkfile create mode 100644 sys/src/libmp/spim/mpvecadd.s create mode 100644 sys/src/libmp/spim/mpvecdigmuladd.s create mode 100644 sys/src/libmp/spim/mpvecdigmulsub.s create mode 100644 sys/src/libmp/spim/mpvecsub.s create mode 100644 sys/src/libsec/spim/md5block.s create mode 100644 sys/src/libsec/spim/mkfile create mode 100644 sys/src/libsec/spim/sha1block.s create mode 100644 sys/src/libthread/spim.c create mode 100644 sys/src/libthread/xincspim.s diff --git a/spim/bin/ape/psh b/spim/bin/ape/psh new file mode 100755 index 0000000..5338a8c --- /dev/null +++ b/spim/bin/ape/psh @@ -0,0 +1,2 @@ +#!/bin/rc +exec /rc/bin/ape/psh $* diff --git a/spim/include/ape/float.h b/spim/include/ape/float.h new file mode 100644 index 0000000..6a975f7 --- /dev/null +++ b/spim/include/ape/float.h @@ -0,0 +1,73 @@ +#ifndef __FLOAT +#define __FLOAT +/* IEEE, default rounding */ + +#define FLT_ROUNDS 1 +#define FLT_RADIX 2 + +#define FLT_DIG 6 +#define FLT_EPSILON 1.19209290e-07 +#define FLT_MANT_DIG 24 +#define FLT_MAX 3.40282347e+38 +#define FLT_MAX_10_EXP 38 +#define FLT_MAX_EXP 128 +#define FLT_MIN 1.17549435e-38 +#define FLT_MIN_10_EXP -37 +#define FLT_MIN_EXP -125 + +#define DBL_DIG 15 +#define DBL_EPSILON 2.2204460492503131e-16 +#define DBL_MANT_DIG 53 +#define DBL_MAX 1.797693134862315708145e+308 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define DBL_MIN 2.225073858507201383090233e-308 +#define DBL_MIN_10_EXP -307 +#define DBL_MIN_EXP -1021 +#define LDBL_MANT_DIG DBL_MANT_DIG +#define LDBL_EPSILON DBL_EPSILON +#define LDBL_DIG DBL_DIG +#define LDBL_MIN_EXP DBL_MIN_EXP +#define LDBL_MIN DBL_MIN +#define LDBL_MIN_10_EXP DBL_MIN_10_EXP +#define LDBL_MAX_EXP DBL_MAX_EXP +#define LDBL_MAX DBL_MAX +#define LDBL_MAX_10_EXP DBL_MAX_10_EXP + +typedef union FPdbleword FPdbleword; +union FPdbleword +{ + double x; + struct { /* little endian */ + long lo; + long hi; + }; +}; + +#ifdef _RESEARCH_SOURCE +/* define stuff needed for floating conversion */ +#define IEEE_8087 1 +#define Sudden_Underflow 1 +#endif +#ifdef _PLAN9_SOURCE +/* FCR */ +#define FPINEX (1<<7) +#define FPOVFL (1<<9) +#define FPUNFL (1<<8) +#define FPZDIV (1<<10) +#define FPRNR (0<<0) +#define FPRZ (1<<0) +#define FPRPINF (2<<0) +#define FPRNINF (3<<0) +#define FPRMASK (3<<0) +#define FPPEXT 0 +#define FPPSGL 0 +#define FPPDBL 0 +#define FPPMASK 0 +/* FSR */ +#define FPAINEX (1<<2) +#define FPAOVFL (1<<4) +#define FPAUNFL (1<<3) +#define FPAZDIV (1<<5) +#endif +#endif /* __FLOAT */ diff --git a/spim/include/ape/math.h b/spim/include/ape/math.h new file mode 100644 index 0000000..a880a01 --- /dev/null +++ b/spim/include/ape/math.h @@ -0,0 +1,78 @@ +#ifndef __MATH +#define __MATH +#pragma lib "/$M/lib/ape/libap.a" + +/* a HUGE_VAL appropriate for IEEE double-precision */ +/* the correct value, 1.797693134862316e+308, causes a ken overflow */ +#define HUGE_VAL 1.79769313486231e+308 + +#ifdef __cplusplus +extern "C" { +#endif + +extern double acos(double); +extern double asin(double); +extern double atan(double); +extern double atan2(double, double); +extern double cos(double); +extern double hypot(double, double); +extern double sin(double); +extern double tan(double); +extern double cosh(double); +extern double sinh(double); +extern double tanh(double); +extern double exp(double); +extern double frexp(double, int *); +extern double ldexp(double, int); +extern double log(double); +extern double log10(double); +extern double modf(double, double *); +extern double pow(double, double); +extern double sqrt(double); +extern double ceil(double); +extern double fabs(double); +extern double floor(double); +extern double fmod(double, double); +extern double NaN(void); +extern int isNaN(double); +extern double Inf(int); +extern int isInf(double, int); + +#ifdef _RESEARCH_SOURCE +/* does >> treat left operand as unsigned ? */ +#define Unsigned_Shifts 1 +#define M_E 2.7182818284590452354 /* e */ +#define M_LOG2E 1.4426950408889634074 /* log 2e */ +#define M_LOG10E 0.43429448190325182765 /* log 10e */ +#define M_LN2 0.69314718055994530942 /* log e2 */ +#define M_LN10 2.30258509299404568402 /* log e10 */ +#define M_PI 3.14159265358979323846 /* pi */ +#define M_PI_2 1.57079632679489661923 /* pi/2 */ +#define M_PI_4 0.78539816339744830962 /* pi/4 */ +#define M_1_PI 0.31830988618379067154 /* 1/pi */ +#define M_2_PI 0.63661977236758134308 /* 2/pi */ +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ + +extern double hypot(double, double); +extern double erf(double); +extern double erfc(double); +extern double j0(double); +extern double y0(double); +extern double j1(double); +extern double y1(double); +extern double jn(int, double); +extern double yn(int, double); + +#endif + + +#ifdef __cplusplus +} +#endif + +#define isnan(x) isNaN(x) +#define isinf(x) isInf(x, 0) + +#endif /* __MATH */ diff --git a/spim/include/ape/stdarg.h b/spim/include/ape/stdarg.h new file mode 100644 index 0000000..a19210b --- /dev/null +++ b/spim/include/ape/stdarg.h @@ -0,0 +1,18 @@ +#ifndef __STDARG +#define __STDARG + +typedef char *va_list; + +#define va_start(list, start) list =\ + (sizeof(start) < 4?\ + (char*)((int*)&(start)+1):\ + (char*)(&(start)+1)) +#define va_end(list) +#define va_arg(list, mode)\ + ((sizeof(mode) == 1)?\ + ((list += 4), (mode*)list)[-4]:\ + (sizeof(mode) == 2)?\ + ((list += 4), (mode*)list)[-2]:\ + ((list += sizeof(mode)), (mode*)list)[-1]) + +#endif /* __STDARG */ diff --git a/spim/include/ape/ureg.h b/spim/include/ape/ureg.h new file mode 100644 index 0000000..702bd1f --- /dev/null +++ b/spim/include/ape/ureg.h @@ -0,0 +1,52 @@ +#ifndef __UREG_H +#define __UREG_H +#if !defined(_PLAN9_SOURCE) + This header file is an extension to ANSI/POSIX +#endif + +struct Ureg +{ + unsigned long status; + unsigned long pc; + union{ + unsigned long sp; /* r29 */ + unsigned long usp; /* r29 */ + }; + unsigned long cause; + unsigned long badvaddr; + unsigned long tlbvirt; + unsigned long hi; + unsigned long lo; + unsigned long r31; + unsigned long r30; + unsigned long r28; + unsigned long r27; /* unused */ + unsigned long r26; /* unused */ + unsigned long r25; + unsigned long r24; + unsigned long r23; + unsigned long r22; + unsigned long r21; + unsigned long r20; + unsigned long r19; + unsigned long r18; + unsigned long r17; + unsigned long r16; + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long r7; + unsigned long r6; + unsigned long r5; + unsigned long r4; + unsigned long r3; + unsigned long r2; + unsigned long r1; +}; + +#endif diff --git a/spim/include/u.h b/spim/include/u.h new file mode 100644 index 0000000..43901d3 --- /dev/null +++ b/spim/include/u.h @@ -0,0 +1,69 @@ +#define nil ((void*)0) +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef unsigned long ulong; +typedef unsigned int uint; +typedef signed char schar; +typedef long long vlong; +typedef unsigned long long uvlong; +typedef unsigned long uintptr; +typedef unsigned long usize; +typedef uint Rune; +typedef union FPdbleword FPdbleword; +typedef long jmp_buf[2]; +#define JMPBUFSP 0 +#define JMPBUFPC 1 +#define JMPBUFDPC 0 +typedef unsigned int mpdigit; /* for /sys/include/mp.h */ +typedef unsigned char u8int; +typedef unsigned short u16int; +typedef unsigned int u32int; +typedef unsigned long long u64int; + +/* FCR (FCR31) */ +#define FPINEX (1<<7) /* enables */ +#define FPUNFL (1<<8) +#define FPOVFL (1<<9) +#define FPZDIV (1<<10) +#define FPINVAL (1<<11) +#define FPRNR (0<<0) /* rounding modes */ +#define FPRZ (1<<0) +#define FPRPINF (2<<0) +#define FPRNINF (3<<0) +#define FPRMASK (3<<0) +#define FPPEXT 0 +#define FPPSGL 0 +#define FPPDBL 0 +#define FPPMASK 0 +#define FPCOND (1<<23) + +/* FSR (also FCR31) */ +#define FPAINEX (1<<2) /* flags */ +#define FPAOVFL (1<<4) +#define FPAUNFL (1<<3) +#define FPAZDIV (1<<5) +#define FPAINVAL (1<<6) + +union FPdbleword +{ + double x; + struct { /* little endian */ + ulong lo; + ulong hi; + }; +}; + +/* stdarg */ +typedef char* va_list; +#define va_start(list, start) list =\ + (sizeof(start) < 4?\ + (char*)((int*)&(start)+1):\ + (char*)(&(start)+1)) +#define va_end(list)\ + USED(list) +#define va_arg(list, mode)\ + ((sizeof(mode) == 1)?\ + ((list += 4), (mode*)list)[-4]:\ + (sizeof(mode) == 2)?\ + ((list += 4), (mode*)list)[-2]:\ + ((list += sizeof(mode)), (mode*)list)[-1]) diff --git a/spim/include/ureg.h b/spim/include/ureg.h new file mode 100644 index 0000000..32cbfb3 --- /dev/null +++ b/spim/include/ureg.h @@ -0,0 +1,44 @@ +struct Ureg +{ + ulong status; + ulong pc; + union{ + ulong sp; /* r29 */ + ulong usp; /* r29 */ + }; + ulong cause; + ulong badvaddr; + ulong tlbvirt; + ulong hi; + ulong lo; + ulong r31; + ulong r30; + ulong r28; + ulong r27; /* unused */ + ulong r26; /* unused */ + ulong r25; + ulong r24; + ulong r23; + ulong r22; + ulong r21; + ulong r20; + ulong r19; + ulong r18; + ulong r17; + ulong r16; + ulong r15; + ulong r14; + ulong r13; + ulong r12; + ulong r11; + ulong r10; + ulong r9; + ulong r8; + ulong r7; + ulong r6; + ulong r5; + ulong r4; + ulong r3; + ulong r2; + ulong r1; +}; diff --git a/spim/mkfile b/spim/mkfile new file mode 100644 index 0000000..d20e5cb --- /dev/null +++ b/spim/mkfile @@ -0,0 +1,9 @@ + +//#include + +void +lock(Lock *l) +{ + if(ainc(&l->key) == 1) + return; /* changed from 0 -> 1: we hold lock */ + /* otherwise wait in kernel */ + while(_SEMACQUIRE(&l->sem, 1) < 0){ + /* interrupted; try again */ + } +} + +void +unlock(Lock *l) +{ + if(adec(&l->key) == 0) + return; /* changed from 1 -> 0: no contention */ + _SEMRELEASE(&l->sem, 1); +} + +int +canlock(Lock *l) +{ + if(ainc(&l->key) == 1) + return 1; /* changed from 0 -> 1: success */ + /* Undo increment (but don't miss wakeup) */ + if(adec(&l->key) == 0) + return 0; /* changed from 1 -> 0: no contention */ + _SEMRELEASE(&l->sem, 1); + return 0; +} diff --git a/sys/src/ape/lib/ap/spim/lock.pre-sema.c b/sys/src/ape/lib/ap/spim/lock.pre-sema.c new file mode 100644 index 0000000..8fc088b --- /dev/null +++ b/sys/src/ape/lib/ap/spim/lock.pre-sema.c @@ -0,0 +1,177 @@ +#define _LOCK_EXTENSION +#include +#include +#include "../plan9/sys9.h" +#include + +enum +{ + Pagesize = 4096, + Semperpg = Pagesize/(16*sizeof(unsigned int)), + Lockaddr = 0x60000000, + + POWER = 0x320, + MAGNUM = 0x330, + MAGNUMII = 0x340, + R4K = 0x500, +}; + +static int arch; +extern int C_3ktas(int*); +extern int C_4ktas(int*); +extern int C_fcr0(void); + +static void +lockinit(void) +{ + int n; + + if(arch != 0) + return; /* allow multiple calls */ + arch = C_fcr0(); + switch(arch) { + case POWER: + n = _SEGATTACH(0, "lock", (void*)Lockaddr, Pagesize); + if(n < 0) { + arch = MAGNUM; + break; + } + memset((void*)Lockaddr, 0, Pagesize); + break; + case MAGNUM: + case MAGNUMII: + case R4K: + break; + default: + arch = R4K; + break; + } + +} + +void +lock(Lock *lk) +{ + int *hwsem; + int hash; + +retry: + switch(arch) { + case 0: + lockinit(); + goto retry; + case MAGNUM: + case MAGNUMII: + while(C_3ktas(&lk->val)) + _SLEEP(0); + return; + case R4K: + for(;;){ + while(lk->val) + ; + if(C_4ktas(&lk->val) == 0) + return; + } + break; + case POWER: + /* Use low order lock bits to generate hash */ + hash = ((int)lk/sizeof(int)) & (Semperpg-1); + hwsem = (int*)Lockaddr+hash; + + for(;;) { + if((*hwsem & 1) == 0) { + if(lk->val) + *hwsem = 0; + else { + lk->val = 1; + *hwsem = 0; + return; + } + } + while(lk->val) + ; + } + } +} + +int +canlock(Lock *lk) +{ + int *hwsem; + int hash; + +retry: + switch(arch) { + case 0: + lockinit(); + goto retry; + case MAGNUM: + case MAGNUMII: + if(C_3ktas(&lk->val)) + return 0; + return 1; + case R4K: + if(C_4ktas(&lk->val)) + return 0; + return 1; + case POWER: + /* Use low order lock bits to generate hash */ + hash = ((int)lk/sizeof(int)) & (Semperpg-1); + hwsem = (int*)Lockaddr+hash; + + if((*hwsem & 1) == 0) { + if(lk->val) + *hwsem = 0; + else { + lk->val = 1; + *hwsem = 0; + return 1; + } + } + return 0; + default: + return 0; + } +} + +void +unlock(Lock *lk) +{ + lk->val = 0; +} + +int +tas(int *p) +{ + int *hwsem; + int hash; + +retry: + switch(arch) { + case 0: + lockinit(); + goto retry; + case MAGNUM: + case MAGNUMII: + return C_3ktas(p); + case R4K: + return C_4ktas(p); + case POWER: + /* Use low order lock bits to generate hash */ + hash = ((int)p/sizeof(int)) & (Semperpg-1); + hwsem = (int*)Lockaddr+hash; + + if((*hwsem & 1) == 0) { + if(*p) + *hwsem = 0; + else { + *p = 1; + *hwsem = 0; + return 0; + } + } + return 1; + default: + return 0; + } +} diff --git a/sys/src/ape/lib/ap/spim/main9.s b/sys/src/ape/lib/ap/spim/main9.s new file mode 100644 index 0000000..a3c224a --- /dev/null +++ b/sys/src/ape/lib/ap/spim/main9.s @@ -0,0 +1,12 @@ + TEXT _main(SB), $16 + MOVW $setR30(SB), R30 + JAL _envsetup(SB) + MOVW inargc-4(FP), R1 + MOVW $inargv+0(FP), R2 + MOVW R1, 4(R29) + MOVW R2, 8(R29) + JAL main(SB) +loop: + MOVW R1, 4(R29) + JAL exit(SB) + JMP loop diff --git a/sys/src/ape/lib/ap/spim/main9p.s b/sys/src/ape/lib/ap/spim/main9p.s new file mode 100644 index 0000000..adc6e00 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/main9p.s @@ -0,0 +1,54 @@ +#define NPRIVATES 16 + +GLOBL _tos(SB), $4 +GLOBL _privates(SB), $4 +GLOBL _nprivates(SB), $4 + +TEXT _mainp(SB), 1, $(3*4+NPRIVATES*4) + MOVW $setR30(SB), R30 + + /* _tos = arg */ + MOVW R1, _tos(SB) +/* + MOVW $0,FCR31 + NOR R0,R0 + MOVD $0.5, F26 + SUBD F26, F26, F24 + ADDD F26, F26, F28 + ADDD F28, F28, F30 +*/ + MOVW $8(SP), R1 + MOVW R1, _privates(SB) + MOVW $NPRIVATES, R1 + MOVW R1, _nprivates(SB) + + /* _profmain(); */ + JAL _profmain(SB) + + /* _tos->prof.pp = _tos->prof.next; */ + MOVW _tos+0(SB),R1 + MOVW 4(R1),R2 + MOVW R2,(R1) + + JAL _envsetup(SB) + + /* main(argc, argv, environ); */ + MOVW inargc-4(FP), R1 + MOVW $inargv+0(FP), R2 + MOVW environ(SB), R3 + MOVW R1, 4(R29) + MOVW R2, 8(R29) + MOVW R3, 12(R29) + JAL main(SB) +loop: + MOVW R1, 4(R29) + JAL exit(SB) + MOVW $_profin(SB), R0 /* force loading of profile */ + JMP loop + +TEXT _savearg(SB), 1, $0 + RET + +TEXT _callpc(SB), 1, $0 + MOVW argp-4(FP), R1 + RET diff --git a/sys/src/ape/lib/ap/spim/memchr.s b/sys/src/ape/lib/ap/spim/memchr.s new file mode 100644 index 0000000..0a2707e --- /dev/null +++ b/sys/src/ape/lib/ap/spim/memchr.s @@ -0,0 +1,39 @@ + TEXT memchr(SB), $0 +MOVW R1, 0(FP) + + MOVW n+8(FP), R1 + MOVW s1+0(FP), R2 + MOVBU c+4(FP), R3 // little endian, 4(FP) instead of 7(FP) + ADDU R1, R2, R6 + + AND $(~1), R1, R5 + ADDU R2, R5 + BEQ R2, R5, lt2 + +l1: + MOVBU 0(R2), R4 + MOVBU 1(R2), R7 + BEQ R3, R4, eq0 + ADDU $2, R2 + BEQ R3, R7, eq + BNE R2, R5, l1 + +lt2: + BEQ R2, R6, zret + +l2: + MOVBU (R2), R4 + ADDU $1, R2 + BEQ R3, R4, eq + BNE R2, R6, l2 +zret: + MOVW R0, R1 + RET + +eq0: + MOVW R2, R1 + RET + +eq: + SUBU $1,R2, R1 + RET diff --git a/sys/src/ape/lib/ap/spim/memcmp.s b/sys/src/ape/lib/ap/spim/memcmp.s new file mode 100644 index 0000000..86240b0 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/memcmp.s @@ -0,0 +1,116 @@ + TEXT memcmp(SB), $0 +MOVW R1, 0(FP) + +/* + * performance: + * alligned about 1.0us/call and 17.4mb/sec + * unalligned is about 3.1mb/sec + */ + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW s1+0(FP), R4 /* R4 is pointer1 */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + ADDU R3,R4, R6 /* R6 is end pointer1 */ + + JMP out // XXX little endian + +/* + * if not at least 4 chars, + * dont even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word cmp. + */ + SGT $4,R3, R1 + BNE R1, out + +/* + * test if both pointers + * are similarly word alligned + */ + XOR R4,R5, R1 + AND $3, R1 + BNE R1, out + +/* + * byte at a time to word allign + */ +l1: + AND $3,R4, R1 + BEQ R1, l2 + MOVB 0(R4), R8 + MOVB 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP l1 + +/* + * turn R3 into end pointer1-15 + * cmp 16 at a time while theres room + */ +l2: + ADDU $-15,R6, R3 +l3: + SGTU R3,R4, R1 + BEQ R1, l4 + MOVW 0(R4), R8 + MOVW 0(R5), R9 + MOVW 4(R4), R10 + BNE R8,R9, ne + MOVW 4(R5), R11 + MOVW 8(R4), R8 + BNE R10,R11, ne1 + MOVW 8(R5), R9 + MOVW 12(R4), R10 + BNE R8,R9, ne + MOVW 12(R5), R11 + ADDU $16, R4 + BNE R10,R11, ne1 + BNE R8,R9, ne + ADDU $16, R5 + JMP l3 + +/* + * turn R3 into end pointer1-3 + * cmp 4 at a time while theres room + */ +l4: + ADDU $-3,R6, R3 +l5: + SGTU R3,R4, R1 + BEQ R1, out + MOVW 0(R4), R8 + MOVW 0(R5), R9 + ADDU $4, R4 + BNE R8,R9, ne /* only works because big endian */ + ADDU $4, R5 + JMP l5 + +/* + * last loop, cmp byte at a time + */ +out: + SGTU R6,R4, R1 + BEQ R1, ret + MOVB 0(R4), R8 + MOVB 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP out + +ne1: + SGTU R10,R11, R1 + BNE R1, ret + MOVW $-1,R1 + RET +ne: + SGTU R8,R9, R1 + BNE R1, ret + MOVW $-1,R1 +ret: + RET + END diff --git a/sys/src/ape/lib/ap/spim/memmove.s b/sys/src/ape/lib/ap/spim/memmove.s new file mode 100644 index 0000000..be4f18f --- /dev/null +++ b/sys/src/ape/lib/ap/spim/memmove.s @@ -0,0 +1,161 @@ + TEXT memmove(SB), $0 + + JMP move + + TEXT memcpy(SB), $0 +move: + MOVW R1, s1+0(FP) + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW R1, R4 /* R4 is to-pointer */ + SGT R0, R3, R5 + BEQ R5, ok + MOVW (R0), R0 /* abort if negative count */ +ok: + MOVW s2+4(FP), R5 /* R5 is from-pointer */ + ADDU R3,R5, R7 /* R7 is end from-pointer */ + ADDU R3,R4, R6 /* R6 is end to-pointer */ + +/* + * easiest test is copy backwards if + * destination string has higher mem address + */ + SGT $4,R3, R2 + SGTU R4,R5, R1 + BNE R1, back + +/* + * if not at least 4 chars, + * don't even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word store. + */ + BNE R2, fout + +/* + * test if both pointers + * are similarly word aligned + */ + XOR R4,R5, R1 + AND $3, R1 + BNE R1, fout + +/* + * byte at a time to word align + */ +f1: + AND $3,R4, R1 + BEQ R1, f2 + MOVB 0(R5), R8 + ADDU $1, R5 + MOVB R8, 0(R4) + ADDU $1, R4 + JMP f1 + +/* + * turn R3 into to-end pointer-15 + * copy 16 at a time while theres room. + * R6 is smaller than R7 -- + * there are problems if R7 is 0. + */ +f2: + ADDU $-15,R6, R3 +f3: + SGTU R3,R4, R1 + BEQ R1, f4 + MOVW 0(R5), R8 + MOVW 4(R5), R9 + MOVW R8, 0(R4) + MOVW 8(R5), R8 + MOVW R9, 4(R4) + MOVW 12(R5), R9 + ADDU $16, R5 + MOVW R8, 8(R4) + MOVW R9, 12(R4) + ADDU $16, R4 + JMP f3 + +/* + * turn R3 into to-end pointer-3 + * copy 4 at a time while theres room + */ +f4: + ADDU $-3,R6, R3 +f5: + SGTU R3,R4, R1 + BEQ R1, fout + MOVW 0(R5), R8 + ADDU $4, R5 + MOVW R8, 0(R4) + ADDU $4, R4 + JMP f5 + +/* + * last loop, copy byte at a time + */ +fout: + BEQ R7,R5, ret + MOVB 0(R5), R8 + ADDU $1, R5 + MOVB R8, 0(R4) + ADDU $1, R4 + JMP fout + +/* + * whole thing repeated for backwards + */ +back: + BNE R2, bout + XOR R6,R7, R1 + AND $3, R1 + BNE R1, bout +b1: + AND $3,R7, R1 + BEQ R1, b2 + MOVB -1(R7), R8 + ADDU $-1, R7 + MOVB R8, -1(R6) + ADDU $-1, R6 + JMP b1 +b2: + ADDU $15,R5, R3 +b3: + SGTU R7,R3, R1 + BEQ R1, b4 + MOVW -4(R7), R8 + MOVW -8(R7), R9 + MOVW R8, -4(R6) + MOVW -12(R7), R8 + MOVW R9, -8(R6) + MOVW -16(R7), R9 + ADDU $-16, R7 + MOVW R8, -12(R6) + MOVW R9, -16(R6) + ADDU $-16, R6 + JMP b3 +b4: + ADDU $3,R5, R3 +b5: + SGTU R7,R3, R1 + BEQ R1, bout + MOVW -4(R7), R8 + ADDU $-4, R7 + MOVW R8, -4(R6) + ADDU $-4, R6 + JMP b5 + +bout: + BEQ R7,R5, ret + MOVB -1(R7), R8 + ADDU $-1, R7 + MOVB R8, -1(R6) + ADDU $-1, R6 + JMP bout + +ret: + MOVW s1+0(FP), R1 + RET + END diff --git a/sys/src/ape/lib/ap/spim/memset.s b/sys/src/ape/lib/ap/spim/memset.s new file mode 100644 index 0000000..8c9467f --- /dev/null +++ b/sys/src/ape/lib/ap/spim/memset.s @@ -0,0 +1,88 @@ + TEXT memset(SB),$12 +MOVW R1, 0(FP) + +/* + * performance: + * about 1us/call and 28mb/sec + */ + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW p+0(FP), R4 /* R4 is pointer */ + MOVW c+4(FP), R5 /* R5 is char */ + ADDU R3,R4, R6 /* R6 is end pointer */ + +/* + * if not at least 4 chars, + * dont even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word store. + */ + SGT $4,R3, R1 + BNE R1, out + +/* + * turn R5 into a word of characters + */ + AND $0xff, R5 + SLL $8,R5, R1 + OR R1, R5 + SLL $16,R5, R1 + OR R1, R5 + +/* + * store one byte at a time until pointer + * is alligned on a word boundary + */ +l1: + AND $3,R4, R1 + BEQ R1, l2 + MOVB R5, 0(R4) + ADDU $1, R4 + JMP l1 + +/* + * turn R3 into end pointer-15 + * store 16 at a time while theres room + */ +l2: + ADDU $-15,R6, R3 +l3: + SGTU R3,R4, R1 + BEQ R1, l4 + MOVW R5, 0(R4) + MOVW R5, 4(R4) + ADDU $16, R4 + MOVW R5, -8(R4) + MOVW R5, -4(R4) + JMP l3 + +/* + * turn R3 into end pointer-3 + * store 4 at a time while theres room + */ +l4: + ADDU $-3,R6, R3 +l5: + SGTU R3,R4, R1 + BEQ R1, out + MOVW R5, 0(R4) + ADDU $4, R4 + JMP l5 + +/* + * last loop, store byte at a time + */ +out: + SGTU R6,R4 ,R1 + BEQ R1, ret + MOVB R5, 0(R4) + ADDU $1, R4 + JMP out + +ret: + MOVW s1+0(FP), R1 + RET + END diff --git a/sys/src/ape/lib/ap/spim/mkfile b/sys/src/ape/lib/ap/spim/mkfile new file mode 100644 index 0000000..48b4d7b --- /dev/null +++ b/sys/src/ape/lib/ap/spim/mkfile @@ -0,0 +1,27 @@ +APE=/sys/src/ape +<$APE/config +LIB=/$objtype/lib/ape/libap.a +OFILES=\ + atom.$O\ + c_fcr0.$O\ + cycles.$O\ + getfcr.$O\ + lock.$O\ + main9.$O\ + main9p.$O\ + memchr.$O\ + memcmp.$O\ + memmove.$O\ + memset.$O\ + notetramp.$O\ + setjmp.$O\ + strchr.$O\ + strcmp.$O\ + strcpy.$O\ + tas.$O\ + vlop.$O\ + vlrt.$O\ + + +#include + +/* A stack to hold pcs when signals nest */ +#define MAXSIGSTACK 20 +typedef struct Pcstack Pcstack; +static struct Pcstack { + int sig; + void (*hdlr)(int, char*, Ureg*); + unsigned long restorepc; + Ureg *u; +} pcstack[MAXSIGSTACK]; +static int nstack = 0; + +static void notecont(Ureg*, char*); + +void +_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u) +{ + Pcstack *p; + + if(nstack >= MAXSIGSTACK) + _NOTED(1); /* nesting too deep; just do system default */ + p = &pcstack[nstack]; + p->restorepc = u->pc; + p->sig = sig; + p->hdlr = hdlr; + p->u = u; + nstack++; + u->pc = (unsigned long) notecont; + _NOTED(2); /* NSAVE: clear note but hold state */ +} + +static void +notecont(Ureg *u, char *s) +{ + Pcstack *p; + void(*f)(int, char*, Ureg*); + + p = &pcstack[nstack-1]; + f = p->hdlr; + u->pc = p->restorepc; + nstack--; + (*f)(p->sig, s, u); + _NOTED(3); /* NRSTR */ +} + +#define JMPBUFPC 1 +#define JMPBUFSP 0 + +extern sigset_t _psigblocked; + +void +siglongjmp(sigjmp_buf j, int ret) +{ + struct Ureg *u; + + if(j[0]) + _psigblocked = j[1]; + if(nstack == 0 || pcstack[nstack-1].u->sp > j[2+JMPBUFSP]) + longjmp(j+2, ret); + u = pcstack[nstack-1].u; + nstack--; + u->r1 = ret; + if(ret == 0) + u->r1 = 1; + u->pc = j[2+JMPBUFPC]; + u->sp = j[2+JMPBUFSP]; + _NOTED(3); /* NRSTR */ +} diff --git a/sys/src/ape/lib/ap/spim/setjmp.s b/sys/src/ape/lib/ap/spim/setjmp.s new file mode 100644 index 0000000..32ff470 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/setjmp.s @@ -0,0 +1,24 @@ +TEXT setjmp(SB), 1, $-4 + MOVW R29, (R1) + MOVW R31, 4(R1) + MOVW $0, R1 + RET + +TEXT sigsetjmp(SB), 1, $-4 + MOVW savemask+4(FP), R2 + MOVW R2, 0(R1) + MOVW $_psigblocked(SB), R2 + MOVW R2, 4(R1) + MOVW R29, 8(R1) + MOVW R31, 12(R1) + MOVW $0, R1 + RET + +TEXT longjmp(SB), 1, $-4 + MOVW r+4(FP), R3 + BNE R3, ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVW $1, R3 /* bless their pointed heads */ +ok: MOVW (R1), R29 + MOVW 4(R1), R31 + MOVW R3, R1 + RET diff --git a/sys/src/ape/lib/ap/spim/strchr.s b/sys/src/ape/lib/ap/spim/strchr.s new file mode 100644 index 0000000..f550ee2 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/strchr.s @@ -0,0 +1,63 @@ + TEXT strchr(SB), $0 +MOVW R1, 0(FP) + MOVB c+4(FP), R4 // little endian, 4(FP) instead of 7(FP) + MOVW s+0(FP), R3 + + BEQ R4, l2 + +/* + * char is not null + */ +l1: + MOVB (R3), R1 + ADDU $1, R3 + BEQ R1, ret + BNE R1,R4, l1 + JMP rm1 + +/* + * char is null + * align to word + */ +l2: + AND $3,R3, R1 + BEQ R1, l3 + MOVB (R3), R1 + ADDU $1, R3 + BNE R1, l2 + JMP rm1 + +l3: + MOVW $0xff000000, R6 + MOVW $0x00ff0000, R7 + +l4: + MOVW (R3), R5 + ADDU $4, R3 + AND $0xff,R5, R1 /* byte 0 */ + AND $0xff00,R5, R2 /* byte 1 */ + BEQ R1, b0 + AND R7,R5, R1 /* byte 2 */ + BEQ R2, b1 + AND R6,R5, R2 /* byte 3 */ + BEQ R1, b2 + BNE R2, l4 + +rm1: + ADDU $-1,R3, R1 + JMP ret + +b2: + ADDU $-2,R3, R1 + JMP ret + +b1: + ADDU $-3,R3, R1 + JMP ret + +b0: + ADDU $-4,R3, R1 + JMP ret + +ret: + RET diff --git a/sys/src/ape/lib/ap/spim/strcmp.s b/sys/src/ape/lib/ap/spim/strcmp.s new file mode 100644 index 0000000..da986a2 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/strcmp.s @@ -0,0 +1,21 @@ +TEXT strcmp(SB), $0 + + MOVW s2+4(FP), R2 + +l1: + MOVB (R2), R3 + MOVB (R1), R4 + ADDU $1, R1 + BEQ R3, end + ADDU $1, R2 + BEQ R3, R4, l1 + + SGTU R4, R3, R1 + BNE R1, ret + MOVW $-1, R1 + RET + +end: + SGTU R4, R3, R1 +ret: + RET diff --git a/sys/src/ape/lib/ap/spim/strcpy.s b/sys/src/ape/lib/ap/spim/strcpy.s new file mode 100644 index 0000000..df1e247 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/strcpy.s @@ -0,0 +1,92 @@ +TEXT strcpy(SB), $0 + + MOVW s2+4(FP),R2 /* R2 is from pointer */ + MOVW R1, R3 /* R3 is to pointer */ + +/* + * align 'from' pointer + */ +l1: + AND $3, R2, R5 + ADDU $1, R2 + BEQ R5, l2 + MOVB -1(R2), R5 + ADDU $1, R3 + MOVB R5, -1(R3) + BNE R5, l1 + RET + +/* + * test if 'to' is also alligned + */ +l2: + AND $3,R3, R5 + BEQ R5, l4 + +/* + * copy 4 at a time, 'to' not aligned + */ +l3: + MOVW -1(R2), R4 + ADD $4, R2 + ADD $4, R3 + AND $0xff,R4, R5 + MOVB R5, -4(R3) + BEQ R5, out + + SRL $8,R4, R5 + AND $0xff, R5 + MOVB R5, -3(R3) + BEQ R5, out + + SRL $16,R4, R5 + AND $0xff, R5 + MOVB R5, -2(R3) + BEQ R5, out + + SRL $24,R4, R5 + MOVB R5, -1(R3) + BNE R5, l3 + +out: + RET + +/* + * word at a time both aligned + */ +l4: + MOVW $0xff000000, R7 + MOVW $0x00ff0000, R8 + +l5: + ADDU $4, R3 + MOVW -1(R2), R4 /* fetch */ + + ADDU $4, R2 + AND $0xff,R4, R5 /* is it byte 0 */ + AND $0xff00,R4, R6 /* is it byte 1 */ + BEQ R5, b0 + + AND R8,R4, R5 /* is it byte 2 */ + BEQ R6, b1 + + AND R7,R4, R6 /* is it byte 3 */ + BEQ R5, b2 + + MOVW R4, -4(R3) /* store */ + BNE R6, l5 + JMP out + +b0: + MOVB $0, -4(R3) + JMP out + +b1: + MOVB R4, -4(R3) + MOVB $0, -3(R3) + JMP out + +b2: + MOVH R4, -4(R3) + MOVB $0, -2(R3) + JMP out diff --git a/sys/src/ape/lib/ap/spim/tas.s b/sys/src/ape/lib/ap/spim/tas.s new file mode 100644 index 0000000..2dd0310 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/tas.s @@ -0,0 +1,30 @@ +/* + * mips user level lock code + */ + +#define LL(base, rt) WORD $((060<<26)|((base)<<21)|((rt)<<16)) +#define SC(base, rt) WORD $((070<<26)|((base)<<21)|((rt)<<16)) +#define NOOP WORD $0x27 +#define COP3 WORD $(023<<26) + + TEXT C_3ktas(SB),$0 + MOVW R1, R21 +btas: + MOVW R21, R1 + MOVB R0, 1(R1) + NOOP + COP3 + BLTZ R1, btas + RET + + TEXT tas(SB), $0 + TEXT C_4ktas(SB), $0 + MOVW R1, R2 /* address of key */ +tas1: + MOVW $1, R3 + LL(2, 1) + NOOP + SC(2, 3) + NOOP + BEQ R3, tas1 + RET diff --git a/sys/src/ape/lib/ap/spim/vlop.s b/sys/src/ape/lib/ap/spim/vlop.s new file mode 100644 index 0000000..5bceafb --- /dev/null +++ b/sys/src/ape/lib/ap/spim/vlop.s @@ -0,0 +1,17 @@ +TEXT _mulv(SB), $0 + MOVW 8(FP), R2 /* hi1 */ + MOVW 4(FP), R3 /* lo1 */ + MOVW 16(FP), R4 /* hi2 */ + MOVW 12(FP), R5 /* lo2 */ + MULU R5, R3 /* lo1*lo2 -> hi:lo*/ + MOVW LO, R6 + MOVW HI, R7 + MULU R3, R4 /* lo1*hi2 -> _:hi */ + MOVW LO, R8 + ADDU R8, R7 + MULU R2, R5 /* hi1*lo2 -> _:hi */ + MOVW LO, R8 + ADDU R8, R7 + MOVW R6, 0(R1) /* lo */ + MOVW R7, 4(R1) /* hi */ + RET diff --git a/sys/src/ape/lib/ap/spim/vlrt.c b/sys/src/ape/lib/ap/spim/vlrt.c new file mode 100644 index 0000000..d08c917 --- /dev/null +++ b/sys/src/ape/lib/ap/spim/vlrt.c @@ -0,0 +1,719 @@ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef signed char schar; + +#define SIGN(n) (1UL<<(n-1)) + +typedef struct Vlong Vlong; +struct Vlong +{ + union + { + struct + { + ulong lo; + ulong hi; + }; + struct + { + ushort loms; + ushort lols; + ushort hims; + ushort hils; + }; + }; +}; + +void abort(void); + +void +_addv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo + b.lo; + hi = a.hi + b.hi; + if(lo < a.lo) + hi++; + r->lo = lo; + r->hi = hi; +} + +void +_subv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo - b.lo; + hi = a.hi - b.hi; + if(lo > a.lo) + hi--; + r->lo = lo; + r->hi = hi; +} + +void +_d2v(Vlong *y, double d) +{ + union { double d; struct Vlong; } x; + ulong xhi, xlo, ylo, yhi; + int sh; + + x.d = d; + + xhi = (x.hi & 0xfffff) | 0x100000; + xlo = x.lo; + sh = 1075 - ((x.hi >> 20) & 0x7ff); + + ylo = 0; + yhi = 0; + if(sh >= 0) { + /* v = (hi||lo) >> sh */ + if(sh < 32) { + if(sh == 0) { + ylo = xlo; + yhi = xhi; + } else { + ylo = (xlo >> sh) | (xhi << (32-sh)); + yhi = xhi >> sh; + } + } else { + if(sh == 32) { + ylo = xhi; + } else + if(sh < 64) { + ylo = xhi >> (sh-32); + } + } + } else { + /* v = (hi||lo) << -sh */ + sh = -sh; + if(sh <= 10) { + ylo = xlo << sh; + yhi = (xhi << sh) | (xlo >> (32-sh)); + } else { + /* overflow */ + yhi = d; /* causes something awful */ + } + } + if(x.hi & SIGN(32)) { + if(ylo != 0) { + ylo = -ylo; + yhi = ~yhi; + } else + yhi = -yhi; + } + + y->hi = yhi; + y->lo = ylo; +} + +void +_f2v(Vlong *y, float f) +{ + + _d2v(y, f); +} + +double +_v2d(Vlong x) +{ + if(x.hi & SIGN(32)) { + if(x.lo) { + x.lo = -x.lo; + x.hi = ~x.hi; + } else + x.hi = -x.hi; + return -((long)x.hi*4294967296. + x.lo); + } + return (long)x.hi*4294967296. + x.lo; +} + +float +_v2f(Vlong x) +{ + return _v2d(x); +} + +static void +dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp) +{ + ulong numlo, numhi, denhi, denlo, quohi, quolo, t; + int i; + + numhi = num.hi; + numlo = num.lo; + denhi = den.hi; + denlo = den.lo; + + /* + * get a divide by zero + */ + if(denlo==0 && denhi==0) { + numlo = numlo / denlo; + } + + /* + * set up the divisor and find the number of iterations needed + */ + if(numhi >= SIGN(32)) { + quohi = SIGN(32); + quolo = 0; + } else { + quohi = numhi; + quolo = numlo; + } + i = 0; + while(denhi < quohi || (denhi == quohi && denlo < quolo)) { + denhi = (denhi<<1) | (denlo>>31); + denlo <<= 1; + i++; + } + + quohi = 0; + quolo = 0; + for(; i >= 0; i--) { + quohi = (quohi<<1) | (quolo>>31); + quolo <<= 1; + if(numhi > denhi || (numhi == denhi && numlo >= denlo)) { + t = numlo; + numlo -= denlo; + if(numlo > t) + numhi--; + numhi -= denhi; + quolo |= 1; + } + denlo = (denlo>>1) | (denhi<<31); + denhi >>= 1; + } + + if(qp) { + qp->lo = quolo; + qp->hi = quohi; + } + if(rp) { + rp->lo = numlo; + rp->hi = numhi; + } +} + +void +_divvu(Vlong *q, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + q->hi = 0; + q->lo = n.lo / d.lo; + return; + } + dodiv(n, d, q, 0); +} + +void +_modvu(Vlong *r, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + r->hi = 0; + r->lo = n.lo % d.lo; + return; + } + dodiv(n, d, 0, r); +} + +static void +vneg(Vlong *v) +{ + + if(v->lo == 0) { + v->hi = -v->hi; + return; + } + v->lo = -v->lo; + v->hi = ~v->hi; +} + +void +_divv(Vlong *q, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + q->lo = (long)n.lo / (long)d.lo; + q->hi = ((long)q->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, q, 0); + if(nneg != dneg) + vneg(q); +} + +void +_modv(Vlong *r, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + r->lo = (long)n.lo % (long)d.lo; + r->hi = ((long)r->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, 0, r); + if(nneg) + vneg(r); +} + +void +_rshav(Vlong *r, Vlong a, int b) +{ + long t; + + t = a.hi; + if(b >= 32) { + r->hi = t>>31; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = t>>31; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_rshlv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.hi; + if(b >= 32) { + r->hi = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = 0; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_lshv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.lo; + if(b >= 32) { + r->lo = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->hi = 0; + return; + } + r->hi = t << (b-32); + return; + } + if(b <= 0) { + r->lo = t; + r->hi = a.hi; + return; + } + r->lo = t << b; + r->hi = (t >> (32-b)) | (a.hi << b); +} + +void +_andv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi & b.hi; + r->lo = a.lo & b.lo; +} + +void +_orv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi | b.hi; + r->lo = a.lo | b.lo; +} + +void +_xorv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi ^ b.hi; + r->lo = a.lo ^ b.lo; +} + +void +_vpp(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + r->lo++; + if(r->lo == 0) + r->hi++; +} + +void +_vmm(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + if(r->lo == 0) + r->hi--; + r->lo--; +} + +void +_ppv(Vlong *l, Vlong *r) +{ + + r->lo++; + if(r->lo == 0) + r->hi++; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_mmv(Vlong *l, Vlong *r) +{ + + if(r->lo == 0) + r->hi--; + r->lo--; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv) +{ + Vlong t, u; + + u.lo = 0; + u.hi = 0; + switch(type) { + default: + abort(); + break; + + case 1: /* schar */ + t.lo = *(schar*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(schar*)lv = u.lo; + break; + + case 2: /* uchar */ + t.lo = *(uchar*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uchar*)lv = u.lo; + break; + + case 3: /* short */ + t.lo = *(short*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(short*)lv = u.lo; + break; + + case 4: /* ushort */ + t.lo = *(ushort*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ushort*)lv = u.lo; + break; + + case 9: /* int */ + t.lo = *(int*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(int*)lv = u.lo; + break; + + case 10: /* uint */ + t.lo = *(uint*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uint*)lv = u.lo; + break; + + case 5: /* long */ + t.lo = *(long*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(long*)lv = u.lo; + break; + + case 6: /* ulong */ + t.lo = *(ulong*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ulong*)lv = u.lo; + break; + + case 7: /* vlong */ + case 8: /* uvlong */ + fn(&u, *(Vlong*)lv, rv); + *(Vlong*)lv = u; + break; + } + *ret = u; +} + +void +_p2v(Vlong *ret, void *p) +{ + long t; + + t = (ulong)p; + ret->lo = t; + ret->hi = 0; +} + +void +_sl2v(Vlong *ret, long sl) +{ + long t; + + t = sl; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ul2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul; + ret->lo = t; + ret->hi = 0; +} + +void +_si2v(Vlong *ret, int si) +{ + long t; + + t = si; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ui2v(Vlong *ret, uint ui) +{ + long t; + + t = ui; + ret->lo = t; + ret->hi = 0; +} + +void +_sh2v(Vlong *ret, long sh) +{ + long t; + + t = (sh << 16) >> 16; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uh2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xffff; + ret->lo = t; + ret->hi = 0; +} + +void +_sc2v(Vlong *ret, long uc) +{ + long t; + + t = (uc << 24) >> 24; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uc2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xff; + ret->lo = t; + ret->hi = 0; +} + +long +_v2sc(Vlong rv) +{ + long t; + + t = rv.lo & 0xff; + return (t << 24) >> 24; +} + +long +_v2uc(Vlong rv) +{ + + return rv.lo & 0xff; +} + +long +_v2sh(Vlong rv) +{ + long t; + + t = rv.lo & 0xffff; + return (t << 16) >> 16; +} + +long +_v2uh(Vlong rv) +{ + + return rv.lo & 0xffff; +} + +long +_v2sl(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ul(Vlong rv) +{ + + return rv.lo; +} + +long +_v2si(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ui(Vlong rv) +{ + + return rv.lo; +} + +int +_testv(Vlong rv) +{ + return rv.lo || rv.hi; +} + +int +_eqv(Vlong lv, Vlong rv) +{ + return lv.lo == rv.lo && lv.hi == rv.hi; +} + +int +_nev(Vlong lv, Vlong rv) +{ + return lv.lo != rv.lo || lv.hi != rv.hi; +} + +int +_ltv(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lev(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_gtv(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_gev(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} + +int +_lov(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lsv(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_hiv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_hsv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} diff --git a/sys/src/ape/lib/mp/spim/mkfile b/sys/src/ape/lib/mp/spim/mkfile new file mode 100644 index 0000000..678e102 --- /dev/null +++ b/sys/src/ape/lib/mp/spim/mkfile @@ -0,0 +1,26 @@ +APE=/sys/src/ape +<$APE/config + +LIB=/$objtype/lib/ape/libmp.a + +SFILES=\ + mpvecadd.s\ + mpvecdigmuladd.s\ + mpvecdigmulsub.s\ + mpvecsub.s\ +# mpdigdiv.s\ + +HFILES=\ + /sys/include/ape/mp.h\ + ../../../../libmp/port/dat.h + +OFILES=${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +to.ieee); cast = (char*)&fl; for(; i>16, REGZERO, REGTMP); o2 = OP_IRR(opirr(AOR), v, REGTMP, REGTMP); o3 = OP_RRR(oprrr(AADDU), r, REGTMP, REGTMP); - o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, p->to.reg+1); - o5 = OP_IRR(opirr(AMOVF+ALAST), 4, REGTMP, p->to.reg); + if(little) { + o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, p->to.reg); + o5 = OP_IRR(opirr(AMOVF+ALAST), 4, REGTMP, p->to.reg+1); + } else { + o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, p->to.reg+1); + o5 = OP_IRR(opirr(AMOVF+ALAST), 4, REGTMP, p->to.reg); + } break; case 16: o1 = OP_IRR(opirr(ALAST), v>>16, REGZERO, REGTMP); @@ -1035,8 +1043,13 @@ asmout(Prog *p, Optab *o, int aflag) o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, p->to.reg); break; case 8: - o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg+1); - o2 = OP_IRR(opirr(AMOVF+ALAST), v+4, r, p->to.reg); + if(little) { + o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg); + o2 = OP_IRR(opirr(AMOVF+ALAST), v+4, r, p->to.reg+1); + } else { + o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg+1); + o2 = OP_IRR(opirr(AMOVF+ALAST), v+4, r, p->to.reg); + } break; case 4: o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg); @@ -1056,8 +1069,13 @@ asmout(Prog *p, Optab *o, int aflag) o1 = OP_IRR(opirr(ALAST), v>>16, REGZERO, REGTMP); o2 = OP_IRR(opirr(AOR), v, REGTMP, REGTMP); o3 = OP_RRR(oprrr(AADDU), r, REGTMP, REGTMP); - o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg+1); - o5 = OP_IRR(opirr(AMOVF), 4, REGTMP, p->from.reg); + if(little) { + o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg); + o5 = OP_IRR(opirr(AMOVF), 4, REGTMP, p->from.reg+1); + } else { + o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg+1); + o5 = OP_IRR(opirr(AMOVF), 4, REGTMP, p->from.reg); + } break; case 16: if(r == REGTMP) @@ -1068,8 +1086,13 @@ asmout(Prog *p, Optab *o, int aflag) o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg); break; case 8: - o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg+1); - o2 = OP_IRR(opirr(AMOVF), v+4, r, p->from.reg); + if(little) { + o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg); + o2 = OP_IRR(opirr(AMOVF), v+4, r, p->from.reg+1); + } else { + o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg+1); + o2 = OP_IRR(opirr(AMOVF), v+4, r, p->from.reg); + } break; case 4: o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg); diff --git a/sys/src/libc/spim/argv0.s b/sys/src/libc/spim/argv0.s new file mode 100644 index 0000000..8d9f9b2 --- /dev/null +++ b/sys/src/libc/spim/argv0.s @@ -0,0 +1,4 @@ +GLOBL argv0(SB), $4 +GLOBL _tos(SB), $4 +GLOBL _privates(SB), $4 +GLOBL _nprivates(SB), $4 diff --git a/sys/src/libc/spim/atom.s b/sys/src/libc/spim/atom.s new file mode 100644 index 0000000..b907f7f --- /dev/null +++ b/sys/src/libc/spim/atom.s @@ -0,0 +1,52 @@ +/* + * R4000 user-level atomic operations + */ + +#define LL(base, rt) WORD $((060<<26)|((base)<<21)|((rt)<<16)) +#define SC(base, rt) WORD $((070<<26)|((base)<<21)|((rt)<<16)) +#define NOOP WORD $0x27 + +TEXT ainc(SB), 1, $-4 /* long ainc(long *); */ +TEXT _xinc(SB), 1, $-4 /* void _xinc(long *); */ + MOVW R1, R2 /* address of counter */ +loop: MOVW $1, R3 + LL(2, 1) + NOOP + ADDU R1, R3 + MOVW R3, R1 /* return new value */ + SC(2, 3) + NOOP + BEQ R3,loop + RET + +TEXT adec(SB), 1, $-4 /* long adec(long*); */ +TEXT _xdec(SB), 1, $-4 /* long _xdec(long *); */ + MOVW R1, R2 /* address of counter */ +loop1: MOVW $-1, R3 + LL(2, 1) + NOOP + ADDU R1, R3 + MOVW R3, R1 /* return new value */ + SC(2, 3) + NOOP + BEQ R3,loop1 + RET + +/* + * int cas(uint* p, int ov, int nv); + */ +TEXT cas(SB), 1, $-4 + MOVW ov+4(FP), R2 + MOVW nv+8(FP), R3 +spincas: + LL(1, 4) /* R4 = *R1 */ + NOOP + BNE R2, R4, fail + SC(1, 3) /* *R1 = R3 */ + NOOP + BEQ R3, spincas /* R3 == 0 means store failed */ + MOVW $1, R1 + RET +fail: + MOVW $0, R1 + RET diff --git a/sys/src/libc/spim/c_fcr0.s b/sys/src/libc/spim/c_fcr0.s new file mode 100644 index 0000000..62c94bc --- /dev/null +++ b/sys/src/libc/spim/c_fcr0.s @@ -0,0 +1,3 @@ + TEXT C_fcr0(SB), $0 + MOVW FCR0, R1 + RET diff --git a/sys/src/libc/spim/cycles.c b/sys/src/libc/spim/cycles.c new file mode 100644 index 0000000..d03e827 --- /dev/null +++ b/sys/src/libc/spim/cycles.c @@ -0,0 +1,10 @@ +#include +#include + +#pragma profile off + +void +cycles(uvlong*u) +{ + *u = 0LL; +} diff --git a/sys/src/libc/spim/getcallerpc.s b/sys/src/libc/spim/getcallerpc.s new file mode 100644 index 0000000..a28cd93 --- /dev/null +++ b/sys/src/libc/spim/getcallerpc.s @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), $0 + MOVW 0(SP), R1 + RET diff --git a/sys/src/libc/spim/getfcr.s b/sys/src/libc/spim/getfcr.s new file mode 100644 index 0000000..9e84cbc --- /dev/null +++ b/sys/src/libc/spim/getfcr.s @@ -0,0 +1,15 @@ +TEXT getfsr(SB), $0 + MOVW FCR31, R1 + RET + +TEXT setfsr(SB), $0 + MOVW R1, FCR31 + RET + +TEXT getfcr(SB), $0 + MOVW FCR31, R1 + RET + +TEXT setfcr(SB), $0 + MOVW R1, FCR31 + RET diff --git a/sys/src/libc/spim/main9.s b/sys/src/libc/spim/main9.s new file mode 100644 index 0000000..60ed1c1 --- /dev/null +++ b/sys/src/libc/spim/main9.s @@ -0,0 +1,25 @@ +#define NPRIVATES 16 + +TEXT _main(SB), 1, $(16 + NPRIVATES*4) + + MOVW $setR30(SB), R30 + MOVW R1, _tos(SB) + + MOVW $p-64(SP), R1 + MOVW R1, _privates(SB) + MOVW $NPRIVATES, R1 + MOVW R1, _nprivates(SB) + + MOVW inargc-4(FP), R1 + MOVW $inargv+0(FP), R2 + MOVW R1, 4(R29) + MOVW R2, 8(R29) + JAL main(SB) +loop: + MOVW $_exitstr<>(SB), R1 + MOVW R1, 4(R29) + JAL exits(SB) + JMP loop + +DATA _exitstr<>+0(SB)/4, $"main" +GLOBL _exitstr<>+0(SB), $5 diff --git a/sys/src/libc/spim/main9p.s b/sys/src/libc/spim/main9p.s new file mode 100644 index 0000000..c3f73af --- /dev/null +++ b/sys/src/libc/spim/main9p.s @@ -0,0 +1,41 @@ +#define NPRIVATES 16 + +TEXT _mainp(SB), 1, $(16 + NPRIVATES*4) + + MOVW $setR30(SB), R30 + /* _tos = arg */ + MOVW R1, _tos(SB) + + MOVW $p-64(SP), R1 + MOVW R1, _privates(SB) + MOVW $NPRIVATES, R1 + MOVW R1, _nprivates(SB) + + /* _profmain(); */ + JAL _profmain(SB) + /* _tos->prof.pp = _tos->prof.next; */ + MOVW _tos(SB), R1 + MOVW 4(R1), R2 + MOVW R2, 0(R1) + /* main(argc, argv); */ + MOVW inargc-4(FP), R1 + MOVW $inargv+0(FP), R2 + MOVW R1, 4(R29) + MOVW R2, 8(R29) + JAL main(SB) +loop: + MOVW $exits<>(SB), R1 + MOVW R1, 4(R29) + JAL exits(SB) + MOVW $_profin(SB), R0 /* force loading of profile */ + JMP loop + +TEXT _savearg(SB), 1, $0 + RET + +TEXT _callpc(SB), 1, $0 + MOVW argp-4(FP), R1 + RET + +DATA exits<>+0(SB)/4, $"main" +GLOBL exits<>+0(SB), $5 diff --git a/sys/src/libc/spim/memccpy.s b/sys/src/libc/spim/memccpy.s new file mode 100644 index 0000000..6b3dbcc --- /dev/null +++ b/sys/src/libc/spim/memccpy.s @@ -0,0 +1,20 @@ + TEXT memccpy(SB), $0 +MOVW R1, 0(FP) + MOVW n+12(FP), R1 + BEQ R1, ret + MOVW s1+0(FP), R3 + MOVW s2+4(FP), R2 + MOVBU c+8(FP), R4 /* little endian */ + ADDU R1, R2, R5 + +l1: MOVBU (R2), R6 + ADDU $1, R2 + MOVBU R6, (R3) + ADDU $1, R3 + BEQ R4, R6, eq + BNE R2, R5, l1 + MOVW $0, R1 + RET + +eq: MOVW R3, R1 +ret: RET diff --git a/sys/src/libc/spim/memchr.s b/sys/src/libc/spim/memchr.s new file mode 100644 index 0000000..0a2707e --- /dev/null +++ b/sys/src/libc/spim/memchr.s @@ -0,0 +1,39 @@ + TEXT memchr(SB), $0 +MOVW R1, 0(FP) + + MOVW n+8(FP), R1 + MOVW s1+0(FP), R2 + MOVBU c+4(FP), R3 // little endian, 4(FP) instead of 7(FP) + ADDU R1, R2, R6 + + AND $(~1), R1, R5 + ADDU R2, R5 + BEQ R2, R5, lt2 + +l1: + MOVBU 0(R2), R4 + MOVBU 1(R2), R7 + BEQ R3, R4, eq0 + ADDU $2, R2 + BEQ R3, R7, eq + BNE R2, R5, l1 + +lt2: + BEQ R2, R6, zret + +l2: + MOVBU (R2), R4 + ADDU $1, R2 + BEQ R3, R4, eq + BNE R2, R6, l2 +zret: + MOVW R0, R1 + RET + +eq0: + MOVW R2, R1 + RET + +eq: + SUBU $1,R2, R1 + RET diff --git a/sys/src/libc/spim/memcmp.s b/sys/src/libc/spim/memcmp.s new file mode 100644 index 0000000..6160858 --- /dev/null +++ b/sys/src/libc/spim/memcmp.s @@ -0,0 +1,116 @@ + TEXT memcmp(SB), $0 +MOVW R1, 0(FP) + +/* + * performance: + * alligned about 1.0us/call and 17.4mb/sec + * unalligned is about 3.1mb/sec + */ + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW s1+0(FP), R4 /* R4 is pointer1 */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + ADDU R3,R4, R6 /* R6 is end pointer1 */ + + JMP out // XXX little endian + +/* + * if not at least 4 chars, + * dont even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word cmp. + */ + SGT $4,R3, R1 + BNE R1, out + +/* + * test if both pointers + * are similarly word alligned + */ + XOR R4,R5, R1 + AND $3, R1 + BNE R1, out + +/* + * byte at a time to word allign + */ +l1: + AND $3,R4, R1 + BEQ R1, l2 + MOVBU 0(R4), R8 + MOVBU 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP l1 + +/* + * turn R3 into end pointer1-15 + * cmp 16 at a time while theres room + */ +l2: + ADDU $-15,R6, R3 +l3: + SGTU R3,R4, R1 + BEQ R1, l4 + MOVW 0(R4), R8 + MOVW 0(R5), R9 + MOVW 4(R4), R10 + BNE R8,R9, ne + MOVW 4(R5), R11 + MOVW 8(R4), R8 + BNE R10,R11, ne1 + MOVW 8(R5), R9 + MOVW 12(R4), R10 + BNE R8,R9, ne + MOVW 12(R5), R11 + ADDU $16, R4 + BNE R10,R11, ne1 + BNE R8,R9, ne + ADDU $16, R5 + JMP l3 + +/* + * turn R3 into end pointer1-3 + * cmp 4 at a time while theres room + */ +l4: + ADDU $-3,R6, R3 +l5: + SGTU R3,R4, R1 + BEQ R1, out + MOVW 0(R4), R8 + MOVW 0(R5), R9 + ADDU $4, R4 + BNE R8,R9, ne /* only works because big endian */ + ADDU $4, R5 + JMP l5 + +/* + * last loop, cmp byte at a time + */ +out: + SGTU R6,R4, R1 + BEQ R1, ret + MOVBU 0(R4), R8 + MOVBU 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP out + +ne1: + SGTU R10,R11, R1 + BNE R1, ret + MOVW $-1,R1 + RET +ne: + SGTU R8,R9, R1 + BNE R1, ret + MOVW $-1,R1 +ret: + RET + END diff --git a/sys/src/libc/spim/memmove.s b/sys/src/libc/spim/memmove.s new file mode 100644 index 0000000..b092474 --- /dev/null +++ b/sys/src/libc/spim/memmove.s @@ -0,0 +1,237 @@ + TEXT memmove(SB), $0 + + JMP move + + TEXT memcpy(SB), $0 +move: + MOVW R1, s1+0(FP) + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW R1, R4 /* R4 is to-pointer */ + SGT R0, R3, R5 + BEQ R5, ok + MOVW (R0), R0 /* abort if negative count */ +ok: + MOVW s2+4(FP), R5 /* R5 is from-pointer */ + ADDU R3,R5, R7 /* R7 is end from-pointer */ + ADDU R3,R4, R6 /* R6 is end to-pointer */ + +/* + * easiest test is copy backwards if + * destination string has higher mem address + */ + SGT $4,R3, R2 + SGTU R4,R5, R1 + BNE R1, back + +/* + * if not at least 4 chars, + * don't even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word store. + */ + BNE R2, fout + + +/* + * byte at a time to word align destination + */ +f1: + AND $3,R4, R1 + BEQ R1, f2 + MOVB 0(R5), R8 + ADDU $1, R5 + MOVB R8, 0(R4) + ADDU $1, R4 + JMP f1 + +/* + * test if source is now word aligned + */ +f2: + AND $3, R5, R1 + BNE R1, fun2 +/* + * turn R3 into to-end pointer-15 + * copy 16 at a time while theres room. + * R6 is smaller than R7 -- + * there are problems if R7 is 0. + */ + ADDU $-15,R6, R3 +f3: + SGTU R3,R4, R1 + BEQ R1, f4 + MOVW 0(R5), R8 + MOVW 4(R5), R9 + MOVW R8, 0(R4) + MOVW 8(R5), R8 + MOVW R9, 4(R4) + MOVW 12(R5), R9 + ADDU $16, R5 + MOVW R8, 8(R4) + MOVW R9, 12(R4) + ADDU $16, R4 + JMP f3 + +/* + * turn R3 into to-end pointer-3 + * copy 4 at a time while theres room + */ +f4: + ADDU $-3,R6, R3 +f5: + SGTU R3,R4, R1 + BEQ R1, fout + MOVW 0(R5), R8 + ADDU $4, R5 + MOVW R8, 0(R4) + ADDU $4, R4 + JMP f5 + +/* + * forward copy, unaligned + * turn R3 into to-end pointer-15 + * copy 16 at a time while theres room. + * R6 is smaller than R7 -- + * there are problems if R7 is 0. + */ +fun2: + ADDU $-15,R6, R3 +fun3: + SGTU R3,R4, R1 + BEQ R1, fun4 + MOVWR 0(R5), R8 + MOVWL 3(R5), R8 + MOVWR 4(R5), R9 + MOVWL 7(R5), R9 + MOVW R8, 0(R4) + MOVWR 8(R5), R8 + MOVWL 11(R5), R8 + MOVW R9, 4(R4) + MOVWR 12(R5), R9 + MOVWL 15(R5), R9 + ADDU $16, R5 + MOVW R8, 8(R4) + MOVW R9, 12(R4) + ADDU $16, R4 + JMP fun3 + +/* + * turn R3 into to-end pointer-3 + * copy 4 at a time while theres room + */ +fun4: + ADDU $-3,R6, R3 +fun5: + SGTU R3,R4, R1 + BEQ R1, fout + MOVWR 0(R5), R8 + MOVWL 3(R5), R8 + ADDU $4, R5 + MOVW R8, 0(R4) + ADDU $4, R4 + JMP fun5 + +/* + * last loop, copy byte at a time + */ +fout: + BEQ R7,R5, ret + MOVB 0(R5), R8 + ADDU $1, R5 + MOVB R8, 0(R4) + ADDU $1, R4 + JMP fout + +/* + * whole thing repeated for backwards + */ +back: + BNE R2, bout +b1: + AND $3,R6, R1 + BEQ R1, b2 + MOVB -1(R7), R8 + ADDU $-1, R7 + MOVB R8, -1(R6) + ADDU $-1, R6 + JMP b1 + +b2: + AND $3, R7, R1 + BNE R1, bun2 + + ADDU $15,R5, R3 +b3: + SGTU R7,R3, R1 + BEQ R1, b4 + MOVW -4(R7), R8 + MOVW -8(R7), R9 + MOVW R8, -4(R6) + MOVW -12(R7), R8 + MOVW R9, -8(R6) + MOVW -16(R7), R9 + ADDU $-16, R7 + MOVW R8, -12(R6) + MOVW R9, -16(R6) + ADDU $-16, R6 + JMP b3 +b4: + ADDU $3,R5, R3 +b5: + SGTU R7,R3, R1 + BEQ R1, bout + MOVW -4(R7), R8 + ADDU $-4, R7 + MOVW R8, -4(R6) + ADDU $-4, R6 + JMP b5 + +bun2: + ADDU $15,R5, R3 +bun3: + SGTU R7,R3, R1 + BEQ R1, bun4 + MOVWR -4(R7), R8 + MOVWL -1(R7), R8 + MOVWR -8(R7), R9 + MOVWL -5(R7), R9 + MOVW R8, -4(R6) + MOVWR -12(R7), R8 + MOVWL -9(R7), R8 + MOVW R9, -8(R6) + MOVWR -16(R7), R9 + MOVWL -13(R7), R9 + ADDU $-16, R7 + MOVW R8, -12(R6) + MOVW R9, -16(R6) + ADDU $-16, R6 + JMP bun3 + +bun4: + ADDU $3,R5, R3 +bun5: + SGTU R7,R3, R1 + BEQ R1, bout + MOVWR -4(R7), R8 + MOVWL -1(R7), R8 + ADDU $-4, R7 + MOVW R8, -4(R6) + ADDU $-4, R6 + JMP bun5 + +bout: + BEQ R7,R5, ret + MOVB -1(R7), R8 + ADDU $-1, R7 + MOVB R8, -1(R6) + ADDU $-1, R6 + JMP bout + +ret: + MOVW s1+0(FP), R1 + RET + END diff --git a/sys/src/libc/spim/memset.s b/sys/src/libc/spim/memset.s new file mode 100644 index 0000000..8c9467f --- /dev/null +++ b/sys/src/libc/spim/memset.s @@ -0,0 +1,88 @@ + TEXT memset(SB),$12 +MOVW R1, 0(FP) + +/* + * performance: + * about 1us/call and 28mb/sec + */ + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW p+0(FP), R4 /* R4 is pointer */ + MOVW c+4(FP), R5 /* R5 is char */ + ADDU R3,R4, R6 /* R6 is end pointer */ + +/* + * if not at least 4 chars, + * dont even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word store. + */ + SGT $4,R3, R1 + BNE R1, out + +/* + * turn R5 into a word of characters + */ + AND $0xff, R5 + SLL $8,R5, R1 + OR R1, R5 + SLL $16,R5, R1 + OR R1, R5 + +/* + * store one byte at a time until pointer + * is alligned on a word boundary + */ +l1: + AND $3,R4, R1 + BEQ R1, l2 + MOVB R5, 0(R4) + ADDU $1, R4 + JMP l1 + +/* + * turn R3 into end pointer-15 + * store 16 at a time while theres room + */ +l2: + ADDU $-15,R6, R3 +l3: + SGTU R3,R4, R1 + BEQ R1, l4 + MOVW R5, 0(R4) + MOVW R5, 4(R4) + ADDU $16, R4 + MOVW R5, -8(R4) + MOVW R5, -4(R4) + JMP l3 + +/* + * turn R3 into end pointer-3 + * store 4 at a time while theres room + */ +l4: + ADDU $-3,R6, R3 +l5: + SGTU R3,R4, R1 + BEQ R1, out + MOVW R5, 0(R4) + ADDU $4, R4 + JMP l5 + +/* + * last loop, store byte at a time + */ +out: + SGTU R6,R4 ,R1 + BEQ R1, ret + MOVB R5, 0(R4) + ADDU $1, R4 + JMP out + +ret: + MOVW s1+0(FP), R1 + RET + END diff --git a/sys/src/libc/spim/mkfile b/sys/src/libc/spim/mkfile new file mode 100644 index 0000000..054e340 --- /dev/null +++ b/sys/src/libc/spim/mkfile @@ -0,0 +1,40 @@ +objtype=spim + +#include +#include + +void +notejmp(void *vr, jmp_buf j, int ret) +{ + struct Ureg *r = vr; + + r->r1 = ret; + if(ret == 0) + r->r1 = 1; + r->pc = j[JMPBUFPC]; + r->sp = j[JMPBUFSP]; + noted(NCONT); +} diff --git a/sys/src/libc/spim/setjmp.s b/sys/src/libc/spim/setjmp.s new file mode 100644 index 0000000..bb971b6 --- /dev/null +++ b/sys/src/libc/spim/setjmp.s @@ -0,0 +1,14 @@ +TEXT setjmp(SB), 1, $-4 + MOVW R29, (R1) + MOVW R31, 4(R1) + MOVW $0, R1 + RET + +TEXT longjmp(SB), 1, $-4 + MOVW r+4(FP), R3 + BNE R3, ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVW $1, R3 /* bless their pointed heads */ +ok: MOVW (R1), R29 + MOVW 4(R1), R31 + MOVW R3, R1 + RET diff --git a/sys/src/libc/spim/sqrt.c b/sys/src/libc/spim/sqrt.c new file mode 100644 index 0000000..1ca84ad --- /dev/null +++ b/sys/src/libc/spim/sqrt.c @@ -0,0 +1,103 @@ +#include +#include + +static long sqtab[64] = +{ + 0x6cdb2, 0x726d4, 0x77ea3, 0x7d52f, 0x82a85, 0x87eb1, 0x8d1c0, 0x923bd, + 0x974b2, 0x9c4a8, 0xa13a9, 0xa61be, 0xaaeee, 0xafb41, 0xb46bf, 0xb916e, + 0xbdb55, 0xc247a, 0xc6ce3, 0xcb495, 0xcfb95, 0xd41ea, 0xd8796, 0xdcca0, + 0xe110c, 0xe54dd, 0xe9818, 0xedac0, 0xf1cd9, 0xf5e67, 0xf9f6e, 0xfdfef, + 0x01fe0, 0x05ee6, 0x09cfd, 0x0da30, 0x11687, 0x1520c, 0x18cc8, 0x1c6c1, + 0x20000, 0x2388a, 0x27068, 0x2a79e, 0x2de32, 0x3142b, 0x3498c, 0x37e5b, + 0x3b29d, 0x3e655, 0x41989, 0x44c3b, 0x47e70, 0x4b02b, 0x4e16f, 0x51241, + 0x542a2, 0x57296, 0x5a220, 0x5d142, 0x60000, 0x62e5a, 0x65c55, 0x689f2, +}; + +double +sqrt(double arg) +{ + int e, ms; + double a, t; + union + { + double d; + struct + { + long ls; + long ms; + }; + } u; + + u.d = arg; + ms = u.ms; + + /* + * sign extend the mantissa with + * exponent. result should be > 0 for + * normal case. + */ + e = ms >> 20; + if(e <= 0) { + if(e == 0) + return 0; + return NaN(); + } + + /* + * pick up arg/4 by adjusting exponent + */ + u.ms = ms - (2 << 20); + a = u.d; + + /* + * use 5 bits of mantissa and 1 bit + * of exponent to form table index. + * insert exponent/2 - 1. + */ + e = (((e - 1023) >> 1) + 1022) << 20; + u.ms = *(long*)((char*)sqtab + ((ms >> 13) & 0xfc)) | e; + u.ls = 0; + + /* + * three laps of newton + */ + e = 1 << 20; + t = u.d; + u.d = t + a/t; + u.ms -= e; /* u.d /= 2; */ + t = u.d; + u.d = t + a/t; + u.ms -= e; /* u.d /= 2; */ + t = u.d; + + return t + a/t; +} + +/* + * this is the program that generated the table. + * it calls sqrt by some other means. + * + * void + * main(void) + * { + * int i; + * union U + * { + * double d; + * struct + * { + * long ls; + * long ms; + * }; + * } u; + * + * for(i=0; i<64; i++) { + * u.ms = (i<<15) | 0x3fe04000; + * u.ls = 0; + * u.d = sqrt(u.d); + * print(" 0x%.5lux,", u.ms & 0xfffff); + * } + * print("\n"); + * exits(0); + * } + */ diff --git a/sys/src/libc/spim/strchr.s b/sys/src/libc/spim/strchr.s new file mode 100644 index 0000000..f550ee2 --- /dev/null +++ b/sys/src/libc/spim/strchr.s @@ -0,0 +1,63 @@ + TEXT strchr(SB), $0 +MOVW R1, 0(FP) + MOVB c+4(FP), R4 // little endian, 4(FP) instead of 7(FP) + MOVW s+0(FP), R3 + + BEQ R4, l2 + +/* + * char is not null + */ +l1: + MOVB (R3), R1 + ADDU $1, R3 + BEQ R1, ret + BNE R1,R4, l1 + JMP rm1 + +/* + * char is null + * align to word + */ +l2: + AND $3,R3, R1 + BEQ R1, l3 + MOVB (R3), R1 + ADDU $1, R3 + BNE R1, l2 + JMP rm1 + +l3: + MOVW $0xff000000, R6 + MOVW $0x00ff0000, R7 + +l4: + MOVW (R3), R5 + ADDU $4, R3 + AND $0xff,R5, R1 /* byte 0 */ + AND $0xff00,R5, R2 /* byte 1 */ + BEQ R1, b0 + AND R7,R5, R1 /* byte 2 */ + BEQ R2, b1 + AND R6,R5, R2 /* byte 3 */ + BEQ R1, b2 + BNE R2, l4 + +rm1: + ADDU $-1,R3, R1 + JMP ret + +b2: + ADDU $-2,R3, R1 + JMP ret + +b1: + ADDU $-3,R3, R1 + JMP ret + +b0: + ADDU $-4,R3, R1 + JMP ret + +ret: + RET diff --git a/sys/src/libc/spim/strcmp.s b/sys/src/libc/spim/strcmp.s new file mode 100644 index 0000000..da986a2 --- /dev/null +++ b/sys/src/libc/spim/strcmp.s @@ -0,0 +1,21 @@ +TEXT strcmp(SB), $0 + + MOVW s2+4(FP), R2 + +l1: + MOVB (R2), R3 + MOVB (R1), R4 + ADDU $1, R1 + BEQ R3, end + ADDU $1, R2 + BEQ R3, R4, l1 + + SGTU R4, R3, R1 + BNE R1, ret + MOVW $-1, R1 + RET + +end: + SGTU R4, R3, R1 +ret: + RET diff --git a/sys/src/libc/spim/strcpy.s b/sys/src/libc/spim/strcpy.s new file mode 100644 index 0000000..df1e247 --- /dev/null +++ b/sys/src/libc/spim/strcpy.s @@ -0,0 +1,92 @@ +TEXT strcpy(SB), $0 + + MOVW s2+4(FP),R2 /* R2 is from pointer */ + MOVW R1, R3 /* R3 is to pointer */ + +/* + * align 'from' pointer + */ +l1: + AND $3, R2, R5 + ADDU $1, R2 + BEQ R5, l2 + MOVB -1(R2), R5 + ADDU $1, R3 + MOVB R5, -1(R3) + BNE R5, l1 + RET + +/* + * test if 'to' is also alligned + */ +l2: + AND $3,R3, R5 + BEQ R5, l4 + +/* + * copy 4 at a time, 'to' not aligned + */ +l3: + MOVW -1(R2), R4 + ADD $4, R2 + ADD $4, R3 + AND $0xff,R4, R5 + MOVB R5, -4(R3) + BEQ R5, out + + SRL $8,R4, R5 + AND $0xff, R5 + MOVB R5, -3(R3) + BEQ R5, out + + SRL $16,R4, R5 + AND $0xff, R5 + MOVB R5, -2(R3) + BEQ R5, out + + SRL $24,R4, R5 + MOVB R5, -1(R3) + BNE R5, l3 + +out: + RET + +/* + * word at a time both aligned + */ +l4: + MOVW $0xff000000, R7 + MOVW $0x00ff0000, R8 + +l5: + ADDU $4, R3 + MOVW -1(R2), R4 /* fetch */ + + ADDU $4, R2 + AND $0xff,R4, R5 /* is it byte 0 */ + AND $0xff00,R4, R6 /* is it byte 1 */ + BEQ R5, b0 + + AND R8,R4, R5 /* is it byte 2 */ + BEQ R6, b1 + + AND R7,R4, R6 /* is it byte 3 */ + BEQ R5, b2 + + MOVW R4, -4(R3) /* store */ + BNE R6, l5 + JMP out + +b0: + MOVB $0, -4(R3) + JMP out + +b1: + MOVB R4, -4(R3) + MOVB $0, -3(R3) + JMP out + +b2: + MOVH R4, -4(R3) + MOVB $0, -2(R3) + JMP out diff --git a/sys/src/libc/spim/tas.s b/sys/src/libc/spim/tas.s new file mode 100644 index 0000000..d04b393 --- /dev/null +++ b/sys/src/libc/spim/tas.s @@ -0,0 +1,30 @@ +/* + * mips user level lock code + */ + +#define LL(base, rt) WORD $((060<<26)|((base)<<21)|((rt)<<16)) +#define SC(base, rt) WORD $((070<<26)|((base)<<21)|((rt)<<16)) +#define NOOP WORD $0x27 +#define COP3 WORD $(023<<26) + + TEXT C_3ktas(SB),$0 + MOVW R1, R21 +btas: + MOVW R21, R1 + MOVB R0, 1(R1) + NOOP + COP3 + BLTZ R1, btas + RET + + TEXT _tas(SB), $0 + TEXT C_4ktas(SB), $0 + MOVW R1, R2 /* address of key */ +tas1: + MOVW $1, R3 + LL(2, 1) + NOOP + SC(2, 3) + NOOP + BEQ R3, tas1 + RET diff --git a/sys/src/libc/spim/vlop.s b/sys/src/libc/spim/vlop.s new file mode 100644 index 0000000..5bceafb --- /dev/null +++ b/sys/src/libc/spim/vlop.s @@ -0,0 +1,17 @@ +TEXT _mulv(SB), $0 + MOVW 8(FP), R2 /* hi1 */ + MOVW 4(FP), R3 /* lo1 */ + MOVW 16(FP), R4 /* hi2 */ + MOVW 12(FP), R5 /* lo2 */ + MULU R5, R3 /* lo1*lo2 -> hi:lo*/ + MOVW LO, R6 + MOVW HI, R7 + MULU R3, R4 /* lo1*hi2 -> _:hi */ + MOVW LO, R8 + ADDU R8, R7 + MULU R2, R5 /* hi1*lo2 -> _:hi */ + MOVW LO, R8 + ADDU R8, R7 + MOVW R6, 0(R1) /* lo */ + MOVW R7, 4(R1) /* hi */ + RET diff --git a/sys/src/libc/spim/vlrt.c b/sys/src/libc/spim/vlrt.c new file mode 100644 index 0000000..11a57fe --- /dev/null +++ b/sys/src/libc/spim/vlrt.c @@ -0,0 +1,722 @@ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef signed char schar; + +#define SIGN(n) (1UL<<(n-1)) + +typedef struct Vlong Vlong; +struct Vlong +{ + union + { + struct + { + ulong lo; + ulong hi; + }; + struct + { + ushort loms; + ushort lols; + ushort hims; + ushort hils; + }; + }; +}; + +void abort(void); + +/* needed by profiler; can't be profiled. */ +#pragma profile off + +void +_addv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo + b.lo; + hi = a.hi + b.hi; + if(lo < a.lo) + hi++; + r->lo = lo; + r->hi = hi; +} + +void +_subv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo - b.lo; + hi = a.hi - b.hi; + if(lo > a.lo) + hi--; + r->lo = lo; + r->hi = hi; +} + +void +_d2v(Vlong *y, double d) +{ + union { double d; struct Vlong; } x; + ulong xhi, xlo, ylo, yhi; + int sh; + + x.d = d; + + xhi = (x.hi & 0xfffff) | 0x100000; + xlo = x.lo; + sh = 1075 - ((x.hi >> 20) & 0x7ff); + + ylo = 0; + yhi = 0; + if(sh >= 0) { + /* v = (hi||lo) >> sh */ + if(sh < 32) { + if(sh == 0) { + ylo = xlo; + yhi = xhi; + } else { + ylo = (xlo >> sh) | (xhi << (32-sh)); + yhi = xhi >> sh; + } + } else { + if(sh == 32) { + ylo = xhi; + } else + if(sh < 64) { + ylo = xhi >> (sh-32); + } + } + } else { + /* v = (hi||lo) << -sh */ + sh = -sh; + if(sh <= 10) { + ylo = xlo << sh; + yhi = (xhi << sh) | (xlo >> (32-sh)); + } else { + /* overflow */ + yhi = d; /* causes something awful */ + } + } + if(x.hi & SIGN(32)) { + if(ylo != 0) { + ylo = -ylo; + yhi = ~yhi; + } else + yhi = -yhi; + } + + y->hi = yhi; + y->lo = ylo; +} + +void +_f2v(Vlong *y, float f) +{ + + _d2v(y, f); +} + +double +_v2d(Vlong x) +{ + if(x.hi & SIGN(32)) { + if(x.lo) { + x.lo = -x.lo; + x.hi = ~x.hi; + } else + x.hi = -x.hi; + return -((long)x.hi*4294967296. + x.lo); + } + return (long)x.hi*4294967296. + x.lo; +} + +float +_v2f(Vlong x) +{ + return _v2d(x); +} + +static void +dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp) +{ + ulong numlo, numhi, denhi, denlo, quohi, quolo, t; + int i; + + numhi = num.hi; + numlo = num.lo; + denhi = den.hi; + denlo = den.lo; + + /* + * get a divide by zero + */ + if(denlo==0 && denhi==0) { + numlo = numlo / denlo; + } + + /* + * set up the divisor and find the number of iterations needed + */ + if(numhi >= SIGN(32)) { + quohi = SIGN(32); + quolo = 0; + } else { + quohi = numhi; + quolo = numlo; + } + i = 0; + while(denhi < quohi || (denhi == quohi && denlo < quolo)) { + denhi = (denhi<<1) | (denlo>>31); + denlo <<= 1; + i++; + } + + quohi = 0; + quolo = 0; + for(; i >= 0; i--) { + quohi = (quohi<<1) | (quolo>>31); + quolo <<= 1; + if(numhi > denhi || (numhi == denhi && numlo >= denlo)) { + t = numlo; + numlo -= denlo; + if(numlo > t) + numhi--; + numhi -= denhi; + quolo |= 1; + } + denlo = (denlo>>1) | (denhi<<31); + denhi >>= 1; + } + + if(qp) { + qp->lo = quolo; + qp->hi = quohi; + } + if(rp) { + rp->lo = numlo; + rp->hi = numhi; + } +} + +void +_divvu(Vlong *q, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + q->hi = 0; + q->lo = n.lo / d.lo; + return; + } + dodiv(n, d, q, 0); +} + +void +_modvu(Vlong *r, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + r->hi = 0; + r->lo = n.lo % d.lo; + return; + } + dodiv(n, d, 0, r); +} + +static void +vneg(Vlong *v) +{ + + if(v->lo == 0) { + v->hi = -v->hi; + return; + } + v->lo = -v->lo; + v->hi = ~v->hi; +} + +void +_divv(Vlong *q, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + q->lo = (long)n.lo / (long)d.lo; + q->hi = ((long)q->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, q, 0); + if(nneg != dneg) + vneg(q); +} + +void +_modv(Vlong *r, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + r->lo = (long)n.lo % (long)d.lo; + r->hi = ((long)r->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, 0, r); + if(nneg) + vneg(r); +} + +void +_rshav(Vlong *r, Vlong a, int b) +{ + long t; + + t = a.hi; + if(b >= 32) { + r->hi = t>>31; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = t>>31; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_rshlv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.hi; + if(b >= 32) { + r->hi = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = 0; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_lshv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.lo; + if(b >= 32) { + r->lo = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->hi = 0; + return; + } + r->hi = t << (b-32); + return; + } + if(b <= 0) { + r->lo = t; + r->hi = a.hi; + return; + } + r->lo = t << b; + r->hi = (t >> (32-b)) | (a.hi << b); +} + +void +_andv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi & b.hi; + r->lo = a.lo & b.lo; +} + +void +_orv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi | b.hi; + r->lo = a.lo | b.lo; +} + +void +_xorv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi ^ b.hi; + r->lo = a.lo ^ b.lo; +} + +void +_vpp(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + r->lo++; + if(r->lo == 0) + r->hi++; +} + +void +_vmm(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + if(r->lo == 0) + r->hi--; + r->lo--; +} + +void +_ppv(Vlong *l, Vlong *r) +{ + + r->lo++; + if(r->lo == 0) + r->hi++; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_mmv(Vlong *l, Vlong *r) +{ + + if(r->lo == 0) + r->hi--; + r->lo--; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv) +{ + Vlong t, u; + + u.lo = 0; + u.hi = 0; + switch(type) { + default: + abort(); + break; + + case 1: /* schar */ + t.lo = *(schar*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(schar*)lv = u.lo; + break; + + case 2: /* uchar */ + t.lo = *(uchar*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uchar*)lv = u.lo; + break; + + case 3: /* short */ + t.lo = *(short*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(short*)lv = u.lo; + break; + + case 4: /* ushort */ + t.lo = *(ushort*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ushort*)lv = u.lo; + break; + + case 9: /* int */ + t.lo = *(int*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(int*)lv = u.lo; + break; + + case 10: /* uint */ + t.lo = *(uint*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uint*)lv = u.lo; + break; + + case 5: /* long */ + t.lo = *(long*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(long*)lv = u.lo; + break; + + case 6: /* ulong */ + t.lo = *(ulong*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ulong*)lv = u.lo; + break; + + case 7: /* vlong */ + case 8: /* uvlong */ + fn(&u, *(Vlong*)lv, rv); + *(Vlong*)lv = u; + break; + } + *ret = u; +} + +void +_p2v(Vlong *ret, void *p) +{ + long t; + + t = (ulong)p; + ret->lo = t; + ret->hi = 0; +} + +void +_sl2v(Vlong *ret, long sl) +{ + long t; + + t = sl; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ul2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul; + ret->lo = t; + ret->hi = 0; +} + +void +_si2v(Vlong *ret, int si) +{ + long t; + + t = si; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ui2v(Vlong *ret, uint ui) +{ + long t; + + t = ui; + ret->lo = t; + ret->hi = 0; +} + +void +_sh2v(Vlong *ret, long sh) +{ + long t; + + t = (sh << 16) >> 16; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uh2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xffff; + ret->lo = t; + ret->hi = 0; +} + +void +_sc2v(Vlong *ret, long uc) +{ + long t; + + t = (uc << 24) >> 24; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uc2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xff; + ret->lo = t; + ret->hi = 0; +} + +long +_v2sc(Vlong rv) +{ + long t; + + t = rv.lo & 0xff; + return (t << 24) >> 24; +} + +long +_v2uc(Vlong rv) +{ + + return rv.lo & 0xff; +} + +long +_v2sh(Vlong rv) +{ + long t; + + t = rv.lo & 0xffff; + return (t << 16) >> 16; +} + +long +_v2uh(Vlong rv) +{ + + return rv.lo & 0xffff; +} + +long +_v2sl(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ul(Vlong rv) +{ + + return rv.lo; +} + +long +_v2si(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ui(Vlong rv) +{ + + return rv.lo; +} + +int +_testv(Vlong rv) +{ + return rv.lo || rv.hi; +} + +int +_eqv(Vlong lv, Vlong rv) +{ + return lv.lo == rv.lo && lv.hi == rv.hi; +} + +int +_nev(Vlong lv, Vlong rv) +{ + return lv.lo != rv.lo || lv.hi != rv.hi; +} + +int +_ltv(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lev(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_gtv(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_gev(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} + +int +_lov(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lsv(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_hiv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_hsv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} diff --git a/sys/src/libmach/vdb.c b/sys/src/libmach/vdb.c index f6a5548..1996102 100644 --- a/sys/src/libmach/vdb.c +++ b/sys/src/libmach/vdb.c @@ -42,7 +42,7 @@ Machdata mipsmach = Machdata mipsmachle = { - {0, 0, 0, 0xD}, /* break point */ + {0xD, 0, 0, 0}, /* break point */ 4, /* break point size */ leswab, /* short to local byte order */ @@ -65,7 +65,7 @@ Machdata mipsmachle = */ Machdata mipsmach2le = { - {0, 0, 0, 0xD}, /* break point */ + {0xD, 0, 0, 0}, /* break point */ 4, /* break point size */ leswab, /* short to local byte order */ diff --git a/sys/src/libmp/spim/mkfile b/sys/src/libmp/spim/mkfile new file mode 100644 index 0000000..6fc2705 --- /dev/null +++ b/sys/src/libmp/spim/mkfile @@ -0,0 +1,21 @@ +objtype=spim += blen, sum has room for alen+1 digits + * + * R1 == a (first arg passed in R1) + * R3 == carry + * R4 == alen + * R5 == b + * R6 == blen + * R7 == sum + * R2 == temporary + * R8 == temporary + * R9 == temporary + */ +TEXT mpvecadd(SB),$-4 + + MOVW alen+4(FP), R4 + MOVW b+8(FP), R5 + MOVW blen+12(FP), R6 + MOVW sum+16(FP), R7 + SUBU R6, R4 /* calculate counter for second loop (alen > blen) */ + MOVW R0, R3 + + /* if blen == 0, don't need to add it in */ + BEQ R6,_add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + MOVW 0(R1), R8 + ADDU $4, R1 + MOVW 0(R5), R9 + ADDU $4, R5 + ADDU R3, R8 + SGTU R3, R8, R3 + ADDU R8, R9 + SGTU R8, R9, R2 + ADDU R2, R3 + MOVW R9, 0(R7) + ADDU $4, R7 + SUBU $1, R6 + BNE R6, _addloop1 + +_add1: + /* if alen == blen, we're done */ + BEQ R4, _addend + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 + carry */ +_addloop2: + MOVW 0(R1), R8 + ADDU $4, R1 + ADDU R3, R8 + SGTU R3, R8, R3 + MOVW R8, 0(R7) + ADDU $4, R7 + SUBU $1, R4 + BNE R4, _addloop2 + + /* sum[alen] = carry */ +_addend: + MOVW R3, 0(R7) + RET diff --git a/sys/src/libmp/spim/mpvecdigmuladd.s b/sys/src/libmp/spim/mpvecdigmuladd.s new file mode 100644 index 0000000..085d358 --- /dev/null +++ b/sys/src/libmp/spim/mpvecdigmuladd.s @@ -0,0 +1,58 @@ +/* + * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step looks like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * b = R1 + * n = R4 + * m = R5 + * p = R6 + * i = R7 + * hi = R8 - constrained by hardware + * lo = R9 - constrained by hardware + * oldhi = R10 + * tmp = R11 + * + */ +TEXT mpvecdigmuladd(SB),$0 + + MOVW n+4(FP),R4 + MOVW m+8(FP),R5 + MOVW p+12(FP),R6 + + + MOVW R0, R10 /* oldhi = 0 */ + BEQ R6, _muladd1 +_muladdloop: + MOVW 0(R1), R9 /* lo = b[i] */ + ADDU $4, R1 + MOVW 0(R6), R11 /* tmp = p[i] */ + MULU R9, R5 + MOVW HI, R8 /* hi = (b[i] * m)>>32 */ + MOVW LO, R9 /* lo = b[i] * m */ + ADDU R10, R9 /* lo += oldhi */ + SGTU R10, R9, R2 + ADDU R2, R8 /* hi += carry */ + ADDU R9, R11 /* tmp += lo */ + SGTU R9, R11, R2 + ADDU R2, R8 /* hi += carry */ + MOVW R11, 0(R6) /* p[i] = tmp */ + ADDU $4, R6 + MOVW R8, R10 /* oldhi = hi */ + SUBU $1, R4 + BNE R4, _muladdloop + +_muladd1: + MOVW 0(R6), R11 /* tmp = p[i] */ + ADDU R10, R11 /* tmp += oldhi */ + MOVW R11, 0(R6) /* p[i] = tmp */ + + RET diff --git a/sys/src/libmp/spim/mpvecdigmulsub.s b/sys/src/libmp/spim/mpvecdigmulsub.s new file mode 100644 index 0000000..6c717ae --- /dev/null +++ b/sys/src/libmp/spim/mpvecdigmulsub.s @@ -0,0 +1,61 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step looks like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * b = R1 + * n = R4 + * m = R5 + * p = R6 + * i = R7 + * hi = R8 - constrained by hardware + * lo = R9 - constrained by hardware + * oldhi = R10 + * tmp = R11 + * + */ +TEXT mpvecdigmulsub(SB),$0 + + MOVW n+4(FP),R4 + MOVW m+8(FP),R5 + MOVW p+12(FP),R6 + + MOVW R0, R10 /* oldhi = 0 */ +_mulsubloop: + MOVW 0(R1), R9 /* lo = b[i] */ + ADDU $4, R1 + MOVW 0(R6), R11 /* tmp = p[i] */ + MULU R9, R5 + MOVW HI, R8 /* hi = (b[i] * m)>>32 */ + MOVW LO, R9 /* lo = b[i] * m */ + ADDU R10, R9 /* lo += oldhi */ + SGTU R10, R9, R2 + ADDU R2, R8 /* hi += carry */ + SUBU R9, R11, R3 /* tmp -= lo */ + SGTU R3, R11, R2 + ADDU R2, R8 /* hi += carry */ + MOVW R3, 0(R6) /* p[i] = tmp */ + ADDU $4, R6 + MOVW R8, R10 /* oldhi = hi */ + SUBU $1, R4 + BNE R4, _mulsubloop + + MOVW 0(R6), R11 /* tmp = p[i] */ + SUBU R10, R11, R3 /* tmp -= oldhi */ + MOVW R3, 0(R6) /* p[i] = tmp */ + SGTU R3, R11, R1 + BNE R1, _mulsub2 + MOVW $1, R1 /* return +1 for positive result */ + RET + +_mulsub2: + MOVW $-1, R1 /* return -1 for negative result */ + RET diff --git a/sys/src/libmp/spim/mpvecsub.s b/sys/src/libmp/spim/mpvecsub.s new file mode 100644 index 0000000..ca156fb --- /dev/null +++ b/sys/src/libmp/spim/mpvecsub.s @@ -0,0 +1,66 @@ +#define BDNZ BC 16,0, +#define BDNE BC 0,2, + +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + * + * R1 == a (first arg passed in R1) + * R3 == carry + * R4 == alen + * R5 == b + * R6 == blen + * R7 == sum + * R2 == temporary + * R8 == temporary + * R9 == temporary + */ +TEXT mpvecsub(SB),$-4 + + MOVW alen+4(FP), R4 + MOVW b+8(FP), R5 + MOVW blen+12(FP), R6 + MOVW sum+16(FP), R7 + SUBU R6, R4 /* calculate counter for second loop (alen > blen) */ + MOVW R0, R3 + + /* if blen == 0, don't need to subtract it */ + BEQ R6,_sub1 + + /* sum[0:blen-1],carry = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + MOVW 0(R1), R8 + ADDU $4, R1 + MOVW 0(R5), R9 + ADDU $4, R5 + SUBU R3, R8, R2 + SGTU R2, R8, R3 + SUBU R9, R2, R8 + SGTU R8, R2, R9 + ADDU R9, R3 + MOVW R8, 0(R7) + ADDU $4, R7 + SUBU $1, R6 + BNE R6, _subloop1 + +_sub1: + /* if alen == blen, we're done */ + BEQ R4, _subend + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 + carry */ +_subloop2: + MOVW 0(R1), R8 + ADDU $4, R1 + SUBU R3, R8, R2 + SGTU R2, R8, R3 + MOVW R2, 0(R7) + ADDU $4, R7 + SUBU $1, R4 + BNE R4, _subloop2 + + /* sum[alen] = carry */ +_subend: + RET diff --git a/sys/src/libsec/spim/md5block.s b/sys/src/libsec/spim/md5block.s new file mode 100644 index 0000000..f009002 --- /dev/null +++ b/sys/src/libsec/spim/md5block.s @@ -0,0 +1,296 @@ +/* + * rfc1321 requires that I include this. The code is new. The constants + * all come from the rfc (hence the copyright). We trade a table for the + * macros in rfc. The total size is a lot less. -- presotto + * + * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All + * rights reserved. + * + * License to copy and use this software is granted provided that it + * is identified as the "RSA Data Security, Inc. MD5 Message-Digest + * Algorithm" in all material mentioning or referencing this software + * or this function. + * + * License is also granted to make and use derivative works provided + * that such works are identified as "derived from the RSA Data + * Security, Inc. MD5 Message-Digest Algorithm" in all material + * mentioning or referencing the derived work. + * + * RSA Data Security, Inc. makes no representations concerning either + * the merchantability of this software or the suitability of this + * software forany particular purpose. It is provided "as is" + * without express or implied warranty of any kind. + * These notices must be retained in any copies of any part of this + * documentation and/or software. + */ + + /* round 1 */ + DATA md5tab<>+( 0*4)(SB)/4,$0xd76aa478 + DATA md5tab<>+( 1*4)(SB)/4,$0xe8c7b756 + DATA md5tab<>+( 2*4)(SB)/4,$0x242070db + DATA md5tab<>+( 3*4)(SB)/4,$0xc1bdceee + DATA md5tab<>+( 4*4)(SB)/4,$0xf57c0faf + DATA md5tab<>+( 5*4)(SB)/4,$0x4787c62a + DATA md5tab<>+( 6*4)(SB)/4,$0xa8304613 + DATA md5tab<>+( 7*4)(SB)/4,$0xfd469501 + DATA md5tab<>+( 8*4)(SB)/4,$0x698098d8 + DATA md5tab<>+( 9*4)(SB)/4,$0x8b44f7af + DATA md5tab<>+(10*4)(SB)/4,$0xffff5bb1 + DATA md5tab<>+(11*4)(SB)/4,$0x895cd7be + DATA md5tab<>+(12*4)(SB)/4,$0x6b901122 + DATA md5tab<>+(13*4)(SB)/4,$0xfd987193 + DATA md5tab<>+(14*4)(SB)/4,$0xa679438e + DATA md5tab<>+(15*4)(SB)/4,$0x49b40821 + + /* round 2 */ + DATA md5tab<>+(16*4)(SB)/4,$0xf61e2562 + DATA md5tab<>+(17*4)(SB)/4,$0xc040b340 + DATA md5tab<>+(18*4)(SB)/4,$0x265e5a51 + DATA md5tab<>+(19*4)(SB)/4,$0xe9b6c7aa + DATA md5tab<>+(20*4)(SB)/4,$0xd62f105d + DATA md5tab<>+(21*4)(SB)/4,$0x02441453 + DATA md5tab<>+(22*4)(SB)/4,$0xd8a1e681 + DATA md5tab<>+(23*4)(SB)/4,$0xe7d3fbc8 + DATA md5tab<>+(24*4)(SB)/4,$0x21e1cde6 + DATA md5tab<>+(25*4)(SB)/4,$0xc33707d6 + DATA md5tab<>+(26*4)(SB)/4,$0xf4d50d87 + DATA md5tab<>+(27*4)(SB)/4,$0x455a14ed + DATA md5tab<>+(28*4)(SB)/4,$0xa9e3e905 + DATA md5tab<>+(29*4)(SB)/4,$0xfcefa3f8 + DATA md5tab<>+(30*4)(SB)/4,$0x676f02d9 + DATA md5tab<>+(31*4)(SB)/4,$0x8d2a4c8a + + /* round 3 */ + DATA md5tab<>+(32*4)(SB)/4,$0xfffa3942 + DATA md5tab<>+(33*4)(SB)/4,$0x8771f681 + DATA md5tab<>+(34*4)(SB)/4,$0x6d9d6122 + DATA md5tab<>+(35*4)(SB)/4,$0xfde5380c + DATA md5tab<>+(36*4)(SB)/4,$0xa4beea44 + DATA md5tab<>+(37*4)(SB)/4,$0x4bdecfa9 + DATA md5tab<>+(38*4)(SB)/4,$0xf6bb4b60 + DATA md5tab<>+(39*4)(SB)/4,$0xbebfbc70 + DATA md5tab<>+(40*4)(SB)/4,$0x289b7ec6 + DATA md5tab<>+(41*4)(SB)/4,$0xeaa127fa + DATA md5tab<>+(42*4)(SB)/4,$0xd4ef3085 + DATA md5tab<>+(43*4)(SB)/4,$0x04881d05 + DATA md5tab<>+(44*4)(SB)/4,$0xd9d4d039 + DATA md5tab<>+(45*4)(SB)/4,$0xe6db99e5 + DATA md5tab<>+(46*4)(SB)/4,$0x1fa27cf8 + DATA md5tab<>+(47*4)(SB)/4,$0xc4ac5665 + + /* round 4 */ + DATA md5tab<>+(48*4)(SB)/4,$0xf4292244 + DATA md5tab<>+(49*4)(SB)/4,$0x432aff97 + DATA md5tab<>+(50*4)(SB)/4,$0xab9423a7 + DATA md5tab<>+(51*4)(SB)/4,$0xfc93a039 + DATA md5tab<>+(52*4)(SB)/4,$0x655b59c3 + DATA md5tab<>+(53*4)(SB)/4,$0x8f0ccc92 + DATA md5tab<>+(54*4)(SB)/4,$0xffeff47d + DATA md5tab<>+(55*4)(SB)/4,$0x85845dd1 + DATA md5tab<>+(56*4)(SB)/4,$0x6fa87e4f + DATA md5tab<>+(57*4)(SB)/4,$0xfe2ce6e0 + DATA md5tab<>+(58*4)(SB)/4,$0xa3014314 + DATA md5tab<>+(59*4)(SB)/4,$0x4e0811a1 + DATA md5tab<>+(60*4)(SB)/4,$0xf7537e82 + DATA md5tab<>+(61*4)(SB)/4,$0xbd3af235 + DATA md5tab<>+(62*4)(SB)/4,$0x2ad7d2bb + DATA md5tab<>+(63*4)(SB)/4,$0xeb86d391 + +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + +#define AREG R5 +#define BREG R6 +#define CREG R7 +#define DREG R8 +#define DATAREG R1 +#define TABREG R10 +#define STREG R11 +#define XREG R12 +#define ELOOPREG R13 +#define EDREG R14 +#define IREG R15 + +#define TMP1 R9 +#define TMP2 R2 +#define TMP3 R3 +#define TMP4 R4 + +/* + * decode little endian data into x[off], then the body + * bodies have this form: + * a += FN(B,C,D); + * a += x[off] + t[off]; + * a = (a << S11) | (a >> (32 - S11)); + * a += b; + */ +#define BODY1(off,FN,SH,A,B,C,D)\ + MOVW off(DATAREG),TMP2;\ + MOVW off(TABREG),TMP3;\ + FN(B,C,D)\ + ADDU TMP1,A;\ + MOVW TMP2,off(XREG);\ + ADDU TMP2,A;\ + ADDU TMP3,A;\ + SLL $SH,A,TMP1;\ + SRL $(32-SH),A;\ + OR TMP1,A;\ + ADDU B,A;\ + +#define BODY(off,inc,FN,SH,A,B,C,D)\ + MOVW off(TABREG),TMP3;\ + ADDU XREG,IREG,TMP4;\ + MOVW (TMP4),TMP2;\ + ADDU $(inc*4),IREG;\ + AND $63,IREG;\ + FN(B,C,D)\ + ADDU TMP1,A;\ + ADDU TMP2,A;\ + ADDU TMP3,A;\ + SLL $SH,A,TMP1;\ + SRL $(32-SH),A;\ + OR TMP1,A;\ + ADDU B,A;\ + +/* + * fn1 = ((c ^ d) & b) ^ d + */ +#define FN1(B,C,D)\ + XOR C,D,TMP1;\ + AND B,TMP1;\ + XOR D,TMP1;\ + +/* + * fn2 = ((b ^ c) & d) ^ c; + */ +#define FN2(B,C,D)\ + XOR B,C,TMP1;\ + AND D,TMP1;\ + XOR C,TMP1;\ + +/* + * fn3 = b ^ c ^ d; + */ +#define FN3(B,C,D)\ + XOR B,C,TMP1;\ + XOR D,TMP1;\ + +/* + * fn4 = c ^ (b | ~d); + */ +#define FN4(B,C,D)\ + XOR $-1,D,TMP1;\ + OR B,TMP1;\ + XOR C,TMP1;\ + +#define DATA 0 +#define LEN 4 +#define STATE 8 + +#define XOFF (-4-16*4) + + TEXT _md5block+0(SB),$68 + + MOVW len+LEN(FP),TMP1 + ADDU DATAREG,TMP1,EDREG + MOVW state+STATE(FP),STREG + + MOVW 0(STREG),AREG + MOVW 4(STREG),BREG + MOVW 8(STREG),CREG + MOVW 12(STREG),DREG + +mainloop: + + MOVW $md5tab<>+0(SB),TABREG + ADDU $(16*4),DATAREG,ELOOPREG + MOVW $x+XOFF(SP),XREG + +loop1: + BODY1(0,FN1,S11,AREG,BREG,CREG,DREG) + BODY1(4,FN1,S12,DREG,AREG,BREG,CREG) + BODY1(8,FN1,S13,CREG,DREG,AREG,BREG) + BODY1(12,FN1,S14,BREG,CREG,DREG,AREG) + + ADDU $16,DATAREG + ADDU $16,TABREG + ADDU $16,XREG + + BNE DATAREG,ELOOPREG,loop1 + + + MOVW $x+XOFF(SP),XREG + MOVW $(1*4),IREG + MOVW $(1*4),ELOOPREG +loop2: + BODY(0,5,FN2,S21,AREG,BREG,CREG,DREG) + BODY(4,5,FN2,S22,DREG,AREG,BREG,CREG) + BODY(8,5,FN2,S23,CREG,DREG,AREG,BREG) + BODY(12,5,FN2,S24,BREG,CREG,DREG,AREG) + + ADDU $16,TABREG + + BNE IREG,ELOOPREG,loop2 + + + MOVW $(5*4),IREG + MOVW $(5*4),ELOOPREG +loop3: + BODY(0,3,FN3,S31,AREG,BREG,CREG,DREG) + BODY(4,3,FN3,S32,DREG,AREG,BREG,CREG) + BODY(8,3,FN3,S33,CREG,DREG,AREG,BREG) + BODY(12,3,FN3,S34,BREG,CREG,DREG,AREG) + + ADDU $16,TABREG + + BNE IREG,ELOOPREG,loop3 + + + MOVW $0,IREG +loop4: + BODY(0,7,FN4,S41,AREG,BREG,CREG,DREG) + BODY(4,7,FN4,S42,DREG,AREG,BREG,CREG) + BODY(8,7,FN4,S43,CREG,DREG,AREG,BREG) + BODY(12,7,FN4,S44,BREG,CREG,DREG,AREG) + + ADDU $16,TABREG + + BNE IREG,R0,loop4 + + MOVW 0(STREG),TMP1 + MOVW 4(STREG),TMP2 + MOVW 8(STREG),TMP3 + MOVW 12(STREG),TMP4 + ADDU TMP1,AREG + ADDU TMP2,BREG + ADDU TMP3,CREG + ADDU TMP4,DREG + MOVW AREG,0(STREG) + MOVW BREG,4(STREG) + MOVW CREG,8(STREG) + MOVW DREG,12(STREG) + + BNE DATAREG,EDREG,mainloop + + RET + + GLOBL md5tab<>+0(SB),$256 + + END diff --git a/sys/src/libsec/spim/mkfile b/sys/src/libsec/spim/mkfile new file mode 100644 index 0000000..3f9c848 --- /dev/null +++ b/sys/src/libsec/spim/mkfile @@ -0,0 +1,19 @@ +objtype=spim +>> 2 + */ +#define BODYX(off,FN,V,A,B,C,D,E)\ + FN(B,C,D)\ + ADDU TMP1,E;\ + ADDU V,E;\ + MOVW TMP2,off(WREG);\ + ADDU TMP2,E;\ + SLL $5,A,TMP3;\ + SRL $27,A,TMP4;\ + OR TMP3,TMP4;\ + ADDU TMP4,E;\ + SLL $30,B,TMP4;\ + SRL $2,B;\ + OR TMP4,B + +/* + * x = data[i] + * BODYX + */ +#define BODY1(off,FN,V,A,B,C,D,E)\ + MOVBU off(DATAREG),TMP2;\ + MOVBU (off+1)(DATAREG),TMP3;\ + MOVBU (off+2)(DATAREG),TMP1;\ + MOVBU (off+3)(DATAREG),TMP4;\ + SLL $24,TMP2;\ + SLL $16,TMP3;\ + OR TMP3,TMP2;\ + SLL $8,TMP1;\ + OR TMP1,TMP2;\ + OR TMP4,TMP2;\ + BODYX(off,FN,V,A,B,C,D,E) + +/* + * x = (wp[off-3] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1; + * BODYX + */ +#define BODY(off,FN,V,A,B,C,D,E)\ + MOVW (off-64)(WREG),TMP1;\ + MOVW (off-56)(WREG),TMP2;\ + MOVW (off-32)(WREG),TMP3;\ + MOVW (off-12)(WREG),TMP4;\ + XOR TMP1,TMP2;\ + XOR TMP3,TMP2;\ + XOR TMP4,TMP2;\ + SLL $1,TMP2,TMP1;\ + SRL $31,TMP2;\ + OR TMP1,TMP2;\ + BODYX(off,FN,V,A,B,C,D,E) + +/* + * fn1 = (((C^D)&B)^D); + */ +#define FN1(B,C,D)\ + XOR C,D,TMP1;\ + AND B,TMP1;\ + XOR D,TMP1; + +/* + * fn24 = B ^ C ^ D + */ +#define FN24(B,C,D)\ + XOR B,C,TMP1;\ + XOR D,TMP1; + +/* + * fn3 = ((B ^ C) & (D ^ B)) ^ B + */ +#define FN3(B,C,D)\ + XOR B,C,TMP1;\ + XOR B,D,TMP4;\ + AND TMP4,TMP1;\ + XOR B,TMP1; + +/* + * stack offsets + * void vtSha1Block(ulong *STATE, uchar *DATA, int LEN) + */ +#define DATA 0 +#define LEN 4 +#define STATE 8 + +/* + * stack offsets for locals + * ulong w[80]; + * uchar *edata; + * ulong *w15, *w40, *w60, *w80; + * register local + * ulong *wp = BP + * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi + * ulong tmp = edi + */ +#define WARRAY (-4-(80*4)) + +#define AREG R5 +#define BREG R6 +#define CREG R7 +#define DREG R8 +#define EREG R9 +#define DATAREG R1 +#define STREG R11 +#define WREG R12 +#define W15REG R13 +#define W60REG R14 +#define W40REG R15 +#define W80REG R16 +#define EDREG R17 +#define VREG R18 + +#define TMP1 R10 +#define TMP2 R2 +#define TMP3 R3 +#define TMP4 R4 +#define TMP5 R19 + + MOVW len+LEN(FP),TMP1 + MOVW state+STATE(FP),STREG + ADDU DATAREG,TMP1,EDREG + + MOVW 0(STREG),AREG + MOVW 4(STREG),BREG + MOVW 8(STREG),CREG + MOVW 12(STREG),DREG + MOVW 16(STREG),EREG + + MOVW $warray+WARRAY(SP),WREG + ADDU $(15*4),WREG,W15REG + ADDU $(40*4),WREG,W40REG + ADDU $(60*4),WREG,W60REG + ADDU $(80*4),WREG,W80REG + +mainloop: + MOVW $warray+WARRAY(SP),WREG + + MOVW $0x5a827999,VREG +loop1: + BODY1(0,FN1,VREG,AREG,BREG,CREG,DREG,EREG) + BODY1(4,FN1,VREG,EREG,AREG,BREG,CREG,DREG) + BODY1(8,FN1,VREG,DREG,EREG,AREG,BREG,CREG) + BODY1(12,FN1,VREG,CREG,DREG,EREG,AREG,BREG) + BODY1(16,FN1,VREG,BREG,CREG,DREG,EREG,AREG) + + ADDU $20,DATAREG + ADDU $20,WREG + BNE WREG,W15REG,loop1 + + BODY1(0,FN1,VREG,AREG,BREG,CREG,DREG,EREG) + ADDU $4,DATAREG + + BODY(4,FN1,VREG,EREG,AREG,BREG,CREG,DREG) + BODY(8,FN1,VREG,DREG,EREG,AREG,BREG,CREG) + BODY(12,FN1,VREG,CREG,DREG,EREG,AREG,BREG) + BODY(16,FN1,VREG,BREG,CREG,DREG,EREG,AREG) + + ADDU $20,WREG + + MOVW $0x6ed9eba1,VREG +loop2: + BODY(0,FN24,VREG,AREG,BREG,CREG,DREG,EREG) + BODY(4,FN24,VREG,EREG,AREG,BREG,CREG,DREG) + BODY(8,FN24,VREG,DREG,EREG,AREG,BREG,CREG) + BODY(12,FN24,VREG,CREG,DREG,EREG,AREG,BREG) + BODY(16,FN24,VREG,BREG,CREG,DREG,EREG,AREG) + + ADDU $20,WREG + BNE WREG,W40REG,loop2 + + MOVW $0x8f1bbcdc,VREG +loop3: + BODY(0,FN3,VREG,AREG,BREG,CREG,DREG,EREG) + BODY(4,FN3,VREG,EREG,AREG,BREG,CREG,DREG) + BODY(8,FN3,VREG,DREG,EREG,AREG,BREG,CREG) + BODY(12,FN3,VREG,CREG,DREG,EREG,AREG,BREG) + BODY(16,FN3,VREG,BREG,CREG,DREG,EREG,AREG) + + ADDU $20,WREG + BNE WREG,W60REG,loop3 + + MOVW $0xca62c1d6,VREG +loop4: + BODY(0,FN24,VREG,AREG,BREG,CREG,DREG,EREG) + BODY(4,FN24,VREG,EREG,AREG,BREG,CREG,DREG) + BODY(8,FN24,VREG,DREG,EREG,AREG,BREG,CREG) + BODY(12,FN24,VREG,CREG,DREG,EREG,AREG,BREG) + BODY(16,FN24,VREG,BREG,CREG,DREG,EREG,AREG) + + ADDU $20,WREG + BNE WREG,W80REG,loop4 + + MOVW 0(STREG),TMP1 + MOVW 4(STREG),TMP2 + MOVW 8(STREG),TMP3 + MOVW 12(STREG),TMP4 + MOVW 16(STREG),TMP5 + + ADDU TMP1,AREG + ADDU TMP2,BREG + ADDU TMP3,CREG + ADDU TMP4,DREG + ADDU TMP5,EREG + + MOVW AREG,0(STREG) + MOVW BREG,4(STREG) + MOVW CREG,8(STREG) + MOVW DREG,12(STREG) + MOVW EREG,16(STREG) + + BNE DATAREG,EDREG,mainloop + + RET + + END diff --git a/sys/src/libthread/spim.c b/sys/src/libthread/spim.c new file mode 100644 index 0000000..60d4bd1 --- /dev/null +++ b/sys/src/libthread/spim.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include "threadimpl.h" + +/* first argument goes in a register; simplest just to ignore it */ +static void +launcherspim(int, void (*f)(void *arg), void *arg) +{ + (*f)(arg); + threadexits(nil); +} + +void +_threadinitstack(Thread *t, void (*f)(void*), void *arg) +{ + ulong *tos; + + tos = (ulong*)&t->stk[t->stksize&~7]; + *--tos = (ulong)arg; + *--tos = (ulong)f; + *--tos = 0; /* first arg to launcherspim */ + *--tos = 0; /* place to store return PC */ + t->sched[JMPBUFPC] = (ulong)launcherspim+JMPBUFDPC; + t->sched[JMPBUFSP] = (ulong)tos; +} diff --git a/sys/src/libthread/xincspim.s b/sys/src/libthread/xincspim.s new file mode 100644 index 0000000..8e53c1a --- /dev/null +++ b/sys/src/libthread/xincspim.s @@ -0,0 +1,46 @@ +/* + * R4000 user level lock code + */ + +#define LL(base, rt) WORD $((060<<26)|((base)<<21)|((rt)<<16)) +#define SC(base, rt) WORD $((070<<26)|((base)<<21)|((rt)<<16)) +#define NOOP WORD $0x27 + +#ifdef oldstyle +TEXT xadd(SB), $0 + + MOVW R1, R2 /* address of counter */ +loop: MOVW n+4(FP), R3 /* increment */ + LL(2, 1) + NOOP + ADD R1,R3,R3 + SC(2, 3) + NOOP + BEQ R3,loop + RET +#endif + +TEXT _xinc(SB), $0 + + MOVW R1, R2 /* address of counter */ +loop: MOVW $1, R3 + LL(2, 1) + NOOP + ADD R1,R3,R3 + SC(2, 3) + NOOP + BEQ R3,loop + RET + +TEXT _xdec(SB), $0 + + MOVW R1, R2 /* address of counter */ +loop1: MOVW $-1, R3 + LL(2, 1) + NOOP + ADD R1,R3,R3 + MOVW R3, R1 + SC(2, 3) + NOOP + BEQ R3,loop1 + RET -- 2.4.11