From help-request at octave dot org Sat Feb 12 13:49:24 2005 Subject: Re: Data types (was: Re: Access the neighbors of an element) From: Paul Kienzle To: octave help mailing list Cc: Joerg Sommer Date: Sat, 12 Feb 2005 14:54:19 -0500 --Apple-Mail-6--1065643009 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset=US-ASCII; format=flowed John, We can do better than converting to double. For unsigned add/subtract operations we can get a big speedup using the following test: z = x+y; if (z < x) z = MX; z = x-y; if (z > x) z = 0; For unsigned multiplication of 8, 16 or 32 bits we can use: p = (widertype)x * (widertype)y; z = (p > MX ? MX : p); For 64 bit unsigned multiplication we can use: #define LO 0xFFFFFFFFUL #define HALF 32 if ((x > LO && (y > LO || (x>>HALF)*y > LO)) || (y>LO && (y>>HALF)*x > LO)) z = MX; else z = x * y; For 8, 16 or 32 bit signed operations we can use: p = (widertype)x OP (widertype)y; z = (p > MX ? MX : p < MN ? MN : p); 64 bit signed add/subtract operations will be a little more work. Maybe something like the following: z = x+y; if (x>0 && y>0 && zx) z = MN; z = x-y; if (x>0 && y<0 && z0 && z>x) z = MN; 64 bit signed multiply is harder still. Maybe: absx = x<0?-x:x; absy = y<0?-y:y; if ((absx > LO && (absy > LO || (absx>>HALF)*absy > LO)) || (absy>LO && (absy>>HALF)*absx > LO)) z = ((absx == x) != (absy == y) ? MN : MX); else z = x * y; Using assembler would be even faster, but then portability and maintenance get much harder. How much faster does it have to be to be worthwhile? I'm attaching the test program I used for timing the unsigned ops on PPC. Someone who uses these operations can turn these ideas into a proper patch for octave. Do more extensive testing of the saturation conditions than I did before submitting. - Paul --Apple-Mail-6--1065643009 Content-Transfer-Encoding: 7bit Content-Type: text/plain; x-unix-mode=0644; name="uint.c" Content-Disposition: attachment; filename=uint.c // Timing comparisons for various unsigned integer ops. // Compile with three defines. // Saturation: // -DNOSAT simple operation // -DDOUBLESAT convert to double for operation // -DSAT check if the result will be/has been saturated // Type: // -DCHAR 8 bit // -DSHORT 16 bit // -DLONG 32 bit // -DLONGLONG 64 bit // Operation: // -DMULT multiply // -DADD add/subtract // // Run the loop n times using // time ./a.out n // If n is zero, it does not run the loop but instead simply prints the // condition that will be tested to detect saturation. #include #if defined(CHAR) typedef unsigned char uinttype; typedef unsigned short int nexttype; #define MX 0xFFU #define LO 0x0FU #define HI 0xF0U #define HALF 4 #elif defined(SHORT) typedef unsigned short int uinttype; typedef unsigned long int nexttype; #define MX 0xFFFFU #define LO 0x00FFU #define HI 0xFF00U #define HALF 8 #elif defined(LONG) typedef unsigned long uinttype; typedef unsigned long long nexttype; #define MX 0xFFFFFFFFUL #define LO 0x0000FFFFUL #define HI 0xFFFF0000UL #define HALF 16 #elif defined(LONGLONG) typedef unsigned long long uinttype; #define MX 0xFFFFFFFFFFFFFFFFULL #define LO 0x00000000FFFFFFFFULL #define HI 0xFFFFFFFF00000000ULL #define MASK 0xFFFFFFFFULL #define HALF 32 #else #error Use -DUSE_XXX where XXX is CHAR SHORT LONG or LONGLONG #endif void check(void) { uinttype z; uinttype x = MX; uinttype y = MX/2; #if defined(ADD) z = x+y; printf("%llu < %llu ?\n",(unsigned long long)z,(unsigned long long)x); z = y-x; printf("%llu > %llu ?\n",(unsigned long long)z,(unsigned long long)y); #elif defined(LONGLONG) if ((x > LO && (y > LO || (x>>HALF)*y > LO)) || (y>LO && (y>>HALF)*x > LO)) printf("multiply overflow correctly detected.\n"); #else nexttype p = (nexttype)x * (nexttype)y; printf("%llu > %llu ?\n",(unsigned long long)p,(unsigned long long)MX); #endif } uinttype loop(int n) { int i; uinttype total = 0; while (n--) for (i=0; i < 10000000; i++) { uinttype x = i, y=i<<(HALF-1), z; #ifdef ADD # if defined(NOSAT) z = x + y; # elif defined(DOUBLESAT) double res = (double)x + (double)y; z = (res > MX ? MX : (res < 0 ? 0 : res) ); # else z = x + y; if (z < x) z = MX; # endif #else # if defined(NOSAT) z = x*y; # elif defined(DOUBLESAT) double res = (double)x * (double)y; z = (res > MX ? MX : (res < 0 ? 0 : res) ); # elif defined(LONGLONG) if ((x > LO && (y > LO || (x>>HALF)*y > LO)) || (y>LO && (y>>HALF)*x > LO)) z = MX; else z = x * y; # else nexttype p = (nexttype)x * (nexttype)y; z = (uinttype)(p>MX ? MX : p); # endif #endif total += z; } return total; } int main(int argc, char *argv[]) { int n; if (argc > 1) n = atoi(argv[1]); else n=10; if (n) loop(n); else check(); } --Apple-Mail-6--1065643009-- ------------------------------------------------------------- Octave is freely available under the terms of the GNU GPL. Octave's home on the web: http://www.octave.org How to fund new projects: http://www.octave.org/funding.html Subscription information: http://www.octave.org/archive.html -------------------------------------------------------------