Discussion:
Valgrind: r16372 - in /trunk/none/tests/amd64: Makefile.am fb_test_amd64.c fb_test_amd64.h fb_test_amd64.stderr.exp fb_test_amd64.stdout.exp fb_test_amd64.vgtest fb_test_amd64_muldiv.h fb_test_amd64_shift.h
(too old to reply)
s***@valgrind.org
2017-05-14 07:56:42 UTC
Permalink
Raw Message
Author: sewardj
Date: Sun May 14 08:56:41 2017
New Revision: 16372

Log:
Add a test for amd64 basic instructions, with particular emphasis on testing
condition codes. This is originally by Fabrice Bellard (GPL2+'d), with MD5
support from Alexander Peslyak (public domain) and has been extended to cover
ADOX and ADCX as per bug #360415.

The program generates more than 800MB of output, which it MD5 sums, so the final
MD5 sum serves as the pass/fail check. It takes roughly a minute to run.

Added:
trunk/none/tests/amd64/fb_test_amd64.c
trunk/none/tests/amd64/fb_test_amd64.h
trunk/none/tests/amd64/fb_test_amd64.stderr.exp
trunk/none/tests/amd64/fb_test_amd64.stdout.exp
trunk/none/tests/amd64/fb_test_amd64.vgtest
trunk/none/tests/amd64/fb_test_amd64_muldiv.h
trunk/none/tests/amd64/fb_test_amd64_shift.h
Modified:
trunk/none/tests/amd64/Makefile.am

Modified: trunk/none/tests/amd64/Makefile.am
==============================================================================
--- trunk/none/tests/amd64/Makefile.am (original)
+++ trunk/none/tests/amd64/Makefile.am Sun May 14 08:56:41 2017
@@ -42,6 +42,9 @@
crc32.vgtest crc32.stdout.exp crc32.stderr.exp \
cmpxchg.vgtest cmpxchg.stdout.exp cmpxchg.stderr.exp \
faultstatus.disabled faultstatus.stderr.exp \
+ fb_test_amd64.vgtest \
+ fb_test_amd64.stderr.exp fb_test_amd64.stdout.exp \
+ fb_test_amd64.h fb_test_amd64_muldiv.h fb_test_amd64_shift.h \
fcmovnu.vgtest fcmovnu.stderr.exp fcmovnu.stdout.exp \
fma4.vgtest fma4.stdout.exp fma4.stderr.exp \
fxtract.vgtest fxtract.stderr.exp fxtract.stdout.exp \
@@ -95,6 +98,7 @@
bug127521-64 bug132813-amd64 bug132918 bug137714-amd64 \
clc \
cmpxchg \
+ fb_test_amd64 \
getseg \
$(INSN_TESTS) \
nan80and64 \
@@ -175,6 +179,8 @@
amd64locked_CFLAGS = $(AM_CFLAGS) -O
bug132918_LDADD = -lm
cmpxchg_CFLAGS = $(AM_CFLAGS) @FLAG_NO_PIE@
+fb_test_amd64_CFLAGS = $(AM_CFLAGS) -O -fno-strict-aliasing
+fb_test_amd64_LDADD = -lm
fcmovnu_CFLAGS = $(AM_CFLAGS) @FLAG_NO_PIE@
fxtract_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_OVERFLOW@ @FLAG_NO_PIE@
insn_basic_SOURCES = insn_basic.def

Added: trunk/none/tests/amd64/fb_test_amd64.c
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64.c (added)
+++ trunk/none/tests/amd64/fb_test_amd64.c Sun May 14 08:56:41 2017
@@ -0,0 +1,1233 @@
+
+/* Contrary to what the next comment says, this is now an amd64 CPU
+ test. */
+
+/*
+ * x86 CPU test
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdarg.h>
+#include <assert.h>
+
+
+//////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////
+
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001. No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * (This is a heavily cut-down "BSD license".)
+ *
+ * This differs from Colin Plumb's older public domain implementation in that
+ * no exactly 32-bit integer data type is required (any 32-bit or wider
+ * unsigned integer data type will do), there's no compile-time endianness
+ * configuration, and the function prototypes match OpenSSL's. No code from
+ * Colin Plumb's implementation has been reused; this comment merely compares
+ * the properties of the two independent implementations.
+ *
+ * The primary goals of this implementation are portability and ease of use.
+ * It is meant to be fast, but not as fast as possible. Some known
+ * optimizations are not included to reduce source code size and avoid
+ * compile-time configuration.
+ */
+
+#include <string.h>
+
+// BEGIN #include "md5.h"
+/* Any 32-bit or wider unsigned integer data type will do */
+typedef unsigned int MD5_u32plus;
+
+typedef struct {
+ MD5_u32plus lo, hi;
+ MD5_u32plus a, b, c, d;
+ unsigned char buffer[64];
+ MD5_u32plus block[16];
+} MD5_CTX;
+
+void MD5_Init(MD5_CTX *ctx);
+void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
+void MD5_Final(unsigned char *result, MD5_CTX *ctx);
+// END #include "md5.h"
+
+/*
+ * The basic MD5 functions.
+ *
+ * F and G are optimized compared to their RFC 1321 definitions for
+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+ * implementation.
+ */
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z) (((x) ^ (y)) ^ (z))
+#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define STEP(f, a, b, c, d, x, t, s) \
+ (a) += f((b), (c), (d)) + (x) + (t); \
+ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+ (a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them in a
+ * properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned memory
+ * accesses is just an optimization. Nothing will break if it fails to detect
+ * a suitable architecture.
+ *
+ * Unfortunately, this optimization may be a C strict aliasing rules violation
+ * if the caller's data buffer has effective type that cannot be aliased by
+ * MD5_u32plus. In practice, this problem may occur if these MD5 routines are
+ * inlined into a calling function, or with future and dangerously advanced
+ * link-time optimizations. For the time being, keeping these MD5 routines in
+ * their own translation unit avoids the problem.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+ (*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+ SET(n)
+#else
+#define SET(n) \
+ (ctx->block[(n)] = \
+ (MD5_u32plus)ptr[(n) * 4] | \
+ ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+ (ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update the bit
+ * counters. There are no alignment requirements.
+ */
+static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+ const unsigned char *ptr;
+ MD5_u32plus a, b, c, d;
+ MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+ ptr = (const unsigned char *)data;
+
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+ d = ctx->d;
+
+ do {
+ saved_a = a;
+ saved_b = b;
+ saved_c = c;
+ saved_d = d;
+
+/* Round 1 */
+ STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+ STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+ STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+ STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+ STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+ STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+ STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+ STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+ STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+ STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+ STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+ STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+ STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+ STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+ STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+ STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+/* Round 2 */
+ STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+ STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+ STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+ STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+ STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+ STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+ STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+ STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+ STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+ STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+ STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+ STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+ STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+ STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+ STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+ STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+/* Round 3 */
+ STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+ STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
+ STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+ STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
+ STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+ STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+ STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+ STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
+ STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+ STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
+ STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+ STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
+ STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+ STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
+ STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+ STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+/* Round 4 */
+ STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+ STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+ STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+ STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+ STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+ STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+ STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+ STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+ STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+ STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+ STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+ STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+ STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+ STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+ STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+ STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+ a += saved_a;
+ b += saved_b;
+ c += saved_c;
+ d += saved_d;
+
+ ptr += 64;
+ } while (size -= 64);
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+ ctx->d = d;
+
+ return ptr;
+}
+
+void MD5_Init(MD5_CTX *ctx)
+{
+ ctx->a = 0x67452301;
+ ctx->b = 0xefcdab89;
+ ctx->c = 0x98badcfe;
+ ctx->d = 0x10325476;
+
+ ctx->lo = 0;
+ ctx->hi = 0;
+}
+
+void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+ MD5_u32plus saved_lo;
+ unsigned long used, available;
+
+ saved_lo = ctx->lo;
+ if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+ ctx->hi++;
+ ctx->hi += size >> 29;
+
+ used = saved_lo & 0x3f;
+
+ if (used) {
+ available = 64 - used;
+
+ if (size < available) {
+ memcpy(&ctx->buffer[used], data, size);
+ return;
+ }
+
+ memcpy(&ctx->buffer[used], data, available);
+ data = (const unsigned char *)data + available;
+ size -= available;
+ body(ctx, ctx->buffer, 64);
+ }
+
+ if (size >= 64) {
+ data = body(ctx, data, size & ~(unsigned long)0x3f);
+ size &= 0x3f;
+ }
+
+ memcpy(ctx->buffer, data, size);
+}
+
+#define OUT(dst, src) \
+ (dst)[0] = (unsigned char)(src); \
+ (dst)[1] = (unsigned char)((src) >> 8); \
+ (dst)[2] = (unsigned char)((src) >> 16); \
+ (dst)[3] = (unsigned char)((src) >> 24);
+
+void MD5_Final(unsigned char *result, MD5_CTX *ctx)
+{
+ unsigned long used, available;
+
+ used = ctx->lo & 0x3f;
+
+ ctx->buffer[used++] = 0x80;
+
+ available = 64 - used;
+
+ if (available < 8) {
+ memset(&ctx->buffer[used], 0, available);
+ body(ctx, ctx->buffer, 64);
+ used = 0;
+ available = 64;
+ }
+
+ memset(&ctx->buffer[used], 0, available - 8);
+
+ ctx->lo <<= 3;
+ OUT(&ctx->buffer[56], ctx->lo)
+ OUT(&ctx->buffer[60], ctx->hi)
+
+ body(ctx, ctx->buffer, 64);
+
+ OUT(&result[0], ctx->a)
+ OUT(&result[4], ctx->b)
+ OUT(&result[8], ctx->c)
+ OUT(&result[12], ctx->d)
+
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+
+//////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////
+
+static MD5_CTX md5ctx;
+
+void xxprintf_start(void)
+{
+ MD5_Init(&md5ctx);
+}
+
+void xxprintf_done(void)
+{
+ const char hexchar[16] = "0123456789abcdef";
+ unsigned char result[100];
+ memset(result, 0, sizeof(result));
+ MD5_Final(&result[0], &md5ctx);
+ printf("final MD5 = ");
+ int i;
+ for (i = 0; i < 16; i++) {
+ printf("%c%c", hexchar[0xF & (result[i] >> 4)],
+ hexchar[0xF & (result[i] >> 0)]);
+ }
+ printf("\n");
+}
+
+void xxprintf (const char *format, ...)
+{
+ char buf[128];
+ memset(buf, 0, sizeof(buf));
+
+ va_list vargs;
+ va_start(vargs, format);
+ int n = vsnprintf(buf, sizeof(buf)-1, format, vargs);
+ va_end(vargs);
+
+ assert(n < sizeof(buf)-1);
+ assert(buf[sizeof(buf)-1] == 0);
+ assert(buf[sizeof(buf)-2] == 0);
+
+ MD5_Update(&md5ctx, buf, strlen(buf));
+ if (0) printf("QQQ %s", buf);
+}
+
+//////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////
+
+
+/* Setting this to 1 creates a very comprehensive test of
+ integer condition codes. */
+#define TEST_INTEGER_VERBOSE 1
+
+typedef long long int int64;
+
+//#define LINUX_VM86_IOPL_FIX
+//#define TEST_P4_FLAGS
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+
+#define CC_C 0x0001
+#define CC_P 0x0004
+#define CC_A 0x0010
+#define CC_Z 0x0040
+#define CC_S 0x0080
+#define CC_O 0x0800
+
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
+
+#define OP add
+#include "fb_test_amd64.h"
+
+#define OP sub
+#include "fb_test_amd64.h"
+
+#define OP xor
+#include "fb_test_amd64.h"
+
+#define OP and
+#include "fb_test_amd64.h"
+
+#define OP or
+#include "fb_test_amd64.h"
+
+#define OP cmp
+#include "fb_test_amd64.h"
+
+#define OP adc
+#define OP_CC
+#include "fb_test_amd64.h"
+
+#define OP sbb
+#define OP_CC
+#include "fb_test_amd64.h"
+
+#define OP adcx
+#define NSH
+#define OP_CC
+#include "fb_test_amd64.h"
+
+#define OP adox
+#define NSH
+#define OP_CC
+#include "fb_test_amd64.h"
+
+#define OP inc
+#define OP_CC
+#define OP1
+#include "fb_test_amd64.h"
+
+#define OP dec
+#define OP_CC
+#define OP1
+#include "fb_test_amd64.h"
+
+#define OP neg
+#define OP_CC
+#define OP1
+#include "fb_test_amd64.h"
+
+#define OP not
+#define OP_CC
+#define OP1
+#include "fb_test_amd64.h"
+
+#undef CC_MASK
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
+
+#define OP shl
+#include "fb_test_amd64_shift.h"
+
+#define OP shr
+#include "fb_test_amd64_shift.h"
+
+#define OP sar
+#include "fb_test_amd64_shift.h"
+
+#define OP rol
+#include "fb_test_amd64_shift.h"
+
+#define OP ror
+#include "fb_test_amd64_shift.h"
+
+#define OP rcr
+#define OP_CC
+#include "fb_test_amd64_shift.h"
+
+#define OP rcl
+#define OP_CC
+#include "fb_test_amd64_shift.h"
+
+/* XXX: should be more precise ? */
+#undef CC_MASK
+#define CC_MASK (CC_C)
+
+/* lea test (modrm support) */
+#define TEST_LEA(STR)\
+{\
+ asm("leaq " STR ", %0"\
+ : "=r" (res)\
+ : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\
+ xxprintf("lea %s = %016llx\n", STR, res);\
+}
+
+#define TEST_LEA16(STR)\
+{\
+ asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
+ : "=wq" (res)\
+ : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+ xxprintf("lea %s = %08x\n", STR, res);\
+}
+
+
+void test_lea(void)
+{
+ int64 rax, rbx, rcx, rdx, rsi, rdi, res;
+ rax = 0x0001;
+ rbx = 0x0002;
+ rcx = 0x0004;
+ rdx = 0x0008;
+ rsi = 0x0010;
+ rdi = 0x0020;
+
+ TEST_LEA("0x4000");
+
+ TEST_LEA("(%%rax)");
+ TEST_LEA("(%%rbx)");
+ TEST_LEA("(%%rcx)");
+ TEST_LEA("(%%rdx)");
+ TEST_LEA("(%%rsi)");
+ TEST_LEA("(%%rdi)");
+
+ TEST_LEA("0x40(%%rax)");
+ TEST_LEA("0x40(%%rbx)");
+ TEST_LEA("0x40(%%rcx)");
+ TEST_LEA("0x40(%%rdx)");
+ TEST_LEA("0x40(%%rsi)");
+ TEST_LEA("0x40(%%rdi)");
+
+ TEST_LEA("0x4000(%%rax)");
+ TEST_LEA("0x4000(%%rbx)");
+ TEST_LEA("0x4000(%%rcx)");
+ TEST_LEA("0x4000(%%rdx)");
+ TEST_LEA("0x4000(%%rsi)");
+ TEST_LEA("0x4000(%%rdi)");
+
+ TEST_LEA("(%%rax, %%rcx)");
+ TEST_LEA("(%%rbx, %%rdx)");
+ TEST_LEA("(%%rcx, %%rcx)");
+ TEST_LEA("(%%rdx, %%rcx)");
+ TEST_LEA("(%%rsi, %%rcx)");
+ TEST_LEA("(%%rdi, %%rcx)");
+
+ TEST_LEA("0x40(%%rax, %%rcx)");
+ TEST_LEA("0x4000(%%rbx, %%rdx)");
+
+ TEST_LEA("(%%rcx, %%rcx, 2)");
+ TEST_LEA("(%%rdx, %%rcx, 4)");
+ TEST_LEA("(%%rsi, %%rcx, 8)");
+
+ TEST_LEA("(,%%rax, 2)");
+ TEST_LEA("(,%%rbx, 4)");
+ TEST_LEA("(,%%rcx, 8)");
+
+ TEST_LEA("0x40(,%%rax, 2)");
+ TEST_LEA("0x40(,%%rbx, 4)");
+ TEST_LEA("0x40(,%%rcx, 8)");
+
+
+ TEST_LEA("-10(%%rcx, %%rcx, 2)");
+ TEST_LEA("-10(%%rdx, %%rcx, 4)");
+ TEST_LEA("-10(%%rsi, %%rcx, 8)");
+
+ TEST_LEA("0x4000(%%rcx, %%rcx, 2)");
+ TEST_LEA("0x4000(%%rdx, %%rcx, 4)");
+ TEST_LEA("0x4000(%%rsi, %%rcx, 8)");
+}
+
+#define TEST_JCC(JCC, v1, v2)\
+{ int one = 1; \
+ int res;\
+ asm("movl $1, %0\n\t"\
+ "cmpl %2, %1\n\t"\
+ "j" JCC " 1f\n\t"\
+ "movl $0, %0\n\t"\
+ "1:\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2));\
+ xxprintf("%-10s %d\n", "j" JCC, res);\
+\
+ asm("movl $0, %0\n\t"\
+ "cmpl %2, %1\n\t"\
+ "set" JCC " %b0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2));\
+ xxprintf("%-10s %d\n", "set" JCC, res);\
+ {\
+ asm("movl $0x12345678, %0\n\t"\
+ "cmpl %2, %1\n\t"\
+ "cmov" JCC "l %3, %0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2), "m" (one));\
+ xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\
+ asm("movl $0x12345678, %0\n\t"\
+ "cmpl %2, %1\n\t"\
+ "cmov" JCC "w %w3, %w0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2), "r" (one));\
+ xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\
+ } \
+}
+
+/* various jump tests */
+void test_jcc(void)
+{
+ TEST_JCC("ne", 1, 1);
+ TEST_JCC("ne", 1, 0);
+
+ TEST_JCC("e", 1, 1);
+ TEST_JCC("e", 1, 0);
+
+ TEST_JCC("l", 1, 1);
+ TEST_JCC("l", 1, 0);
+ TEST_JCC("l", 1, -1);
+
+ TEST_JCC("le", 1, 1);
+ TEST_JCC("le", 1, 0);
+ TEST_JCC("le", 1, -1);
+
+ TEST_JCC("ge", 1, 1);
+ TEST_JCC("ge", 1, 0);
+ TEST_JCC("ge", -1, 1);
+
+ TEST_JCC("g", 1, 1);
+ TEST_JCC("g", 1, 0);
+ TEST_JCC("g", 1, -1);
+
+ TEST_JCC("b", 1, 1);
+ TEST_JCC("b", 1, 0);
+ TEST_JCC("b", 1, -1);
+
+ TEST_JCC("be", 1, 1);
+ TEST_JCC("be", 1, 0);
+ TEST_JCC("be", 1, -1);
+
+ TEST_JCC("ae", 1, 1);
+ TEST_JCC("ae", 1, 0);
+ TEST_JCC("ae", 1, -1);
+
+ TEST_JCC("a", 1, 1);
+ TEST_JCC("a", 1, 0);
+ TEST_JCC("a", 1, -1);
+
+
+ TEST_JCC("p", 1, 1);
+ TEST_JCC("p", 1, 0);
+
+ TEST_JCC("np", 1, 1);
+ TEST_JCC("np", 1, 0);
+
+ TEST_JCC("o", 0x7fffffff, 0);
+ TEST_JCC("o", 0x7fffffff, -1);
+
+ TEST_JCC("no", 0x7fffffff, 0);
+ TEST_JCC("no", 0x7fffffff, -1);
+
+ TEST_JCC("s", 0, 1);
+ TEST_JCC("s", 0, -1);
+ TEST_JCC("s", 0, 0);
+
+ TEST_JCC("ns", 0, 1);
+ TEST_JCC("ns", 0, -1);
+ TEST_JCC("ns", 0, 0);
+}
+
+#undef CC_MASK
+#ifdef TEST_P4_FLAGS
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
+#else
+#define CC_MASK (CC_O | CC_C)
+#endif
+
+#define OP mul
+#include "fb_test_amd64_muldiv.h"
+
+#define OP imul
+#include "fb_test_amd64_muldiv.h"
+
+void test_imulw2(int64 op0, int64 op1)
+{
+ int64 res, s1, s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm ("pushq %4\n\t"
+ "popfq\n\t"
+ "imulw %w2, %w0\n\t"
+ "pushfq\n\t"
+ "popq %1\n\t"
+ : "=q" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
+ "imulw", s0, s1, res, flags & CC_MASK);
+}
+
+void test_imull2(int64 op0, int64 op1)
+{
+ int res, s1;
+ int64 s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm ("pushq %4\n\t"
+ "popfq\n\t"
+ "imull %2, %0\n\t"
+ "pushfq\n\t"
+ "popq %1\n\t"
+ : "=q" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n",
+ "imull", s0, s1, res, flags & CC_MASK);
+}
+
+#define TEST_IMUL_IM(size, size1, op0, op1)\
+{\
+ int64 res, flags;\
+ flags = 0;\
+ res = 0;\
+ asm ("pushq %3\n\t"\
+ "popfq\n\t"\
+ "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=r" (res), "=g" (flags)\
+ : "r" (op1), "1" (flags), "0" (res));\
+ xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\
+ "imul" size, op0, op1, res, flags & CC_MASK);\
+}
+
+#define TEST_IMUL_IM_L(op0, op1)\
+{\
+ int64 flags = 0;\
+ int res = 0;\
+ int res64 = 0;\
+ asm ("pushq %3\n\t"\
+ "popfq\n\t"\
+ "imul $" #op0 ", %2, %0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=r" (res64), "=g" (flags)\
+ : "r" (op1), "1" (flags), "0" (res));\
+ xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\
+ "imull", op0, op1, res, flags & CC_MASK);\
+}
+
+
+#undef CC_MASK
+#define CC_MASK (0)
+
+#define OP div
+#include "fb_test_amd64_muldiv.h"
+
+#define OP idiv
+#include "fb_test_amd64_muldiv.h"
+
+void test_mul(void)
+{
+ test_imulb(0x1234561d, 4);
+ test_imulb(3, -4);
+ test_imulb(0x80, 0x80);
+ test_imulb(0x10, 0x10);
+
+ test_imulw(0, 0, 0);
+ test_imulw(0, 0xFF, 0xFF);
+ test_imulw(0, 0xFF, 0x100);
+ test_imulw(0, 0x1234001d, 45);
+ test_imulw(0, 23, -45);
+ test_imulw(0, 0x8000, 0x8000);
+ test_imulw(0, 0x100, 0x100);
+
+ test_imull(0, 0, 0);
+ test_imull(0, 0xFFFF, 0xFFFF);
+ test_imull(0, 0xFFFF, 0x10000);
+ test_imull(0, 0x1234001d, 45);
+ test_imull(0, 23, -45);
+ test_imull(0, 0x80000000, 0x80000000);
+ test_imull(0, 0x10000, 0x10000);
+
+ test_mulb(0x1234561d, 4);
+ test_mulb(3, -4);
+ test_mulb(0x80, 0x80);
+ test_mulb(0x10, 0x10);
+
+ test_mulw(0, 0x1234001d, 45);
+ test_mulw(0, 23, -45);
+ test_mulw(0, 0x8000, 0x8000);
+ test_mulw(0, 0x100, 0x100);
+
+ test_mull(0, 0x1234001d, 45);
+ test_mull(0, 23, -45);
+ test_mull(0, 0x80000000, 0x80000000);
+ test_mull(0, 0x10000, 0x10000);
+
+ test_imulw2(0x1234001d, 45);
+ test_imulw2(23, -45);
+ test_imulw2(0x8000, 0x8000);
+ test_imulw2(0x100, 0x100);
+
+ test_imull2(0x1234001d, 45);
+ test_imull2(23, -45);
+ test_imull2(0x80000000, 0x80000000);
+ test_imull2(0x10000, 0x10000);
+
+ TEST_IMUL_IM("w", "w", 45, 0x1234);
+ TEST_IMUL_IM("w", "w", -45, 23);
+ TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
+ TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
+
+ TEST_IMUL_IM_L(45, 0x1234);
+ TEST_IMUL_IM_L(-45, 23);
+ TEST_IMUL_IM_L(0x8000, 0x80000000);
+ TEST_IMUL_IM_L(0x7fff, 0x1000);
+
+ test_idivb(0x12341678, 0x127e);
+ test_idivb(0x43210123, -5);
+ test_idivb(0x12340004, -1);
+
+ test_idivw(0, 0x12345678, 12347);
+ test_idivw(0, -23223, -45);
+ test_idivw(0, 0x12348000, -1);
+ test_idivw(0x12343, 0x12345678, 0x81238567);
+
+ test_idivl(0, 0x12345678, 12347);
+ test_idivl(0, -233223, -45);
+ test_idivl(0, 0x80000000, -1);
+ test_idivl(0x12343, 0x12345678, 0x81234567);
+
+ test_idivq(0, 0x12345678, 12347);
+ test_idivq(0, -233223, -45);
+ test_idivq(0, 0x80000000, -1);
+ test_idivq(0x12343, 0x12345678, 0x81234567);
+
+ test_divb(0x12341678, 0x127e);
+ test_divb(0x43210123, -5);
+ test_divb(0x12340004, -1);
+
+ test_divw(0, 0x12345678, 12347);
+ test_divw(0, -23223, -45);
+ test_divw(0, 0x12348000, -1);
+ test_divw(0x12343, 0x12345678, 0x81238567);
+
+ test_divl(0, 0x12345678, 12347);
+ test_divl(0, -233223, -45);
+ test_divl(0, 0x80000000, -1);
+ test_divl(0x12343, 0x12345678, 0x81234567);
+
+ test_divq(0, 0x12345678, 12347);
+ test_divq(0, -233223, -45);
+ test_divq(0, 0x80000000, -1);
+ test_divq(0x12343, 0x12345678, 0x81234567);
+}
+
+#define TEST_BSX(op, size, op0)\
+{\
+ int res, val, resz;\
+ val = op0;\
+ asm("xorl %1, %1\n"\
+ "movl $0x12345678, %0\n"\
+ #op " %" size "2, %" size "0 ; setz %b1" \
+ : "=r" (res), "=q" (resz)\
+ : "r" (val));\
+ xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\
+}
+
+void test_bsx(void)
+{
+ TEST_BSX(bsrw, "w", 0);
+ TEST_BSX(bsrw, "w", 0x12340128);
+ TEST_BSX(bsrl, "", 0);
+ TEST_BSX(bsrl, "", 0x00340128);
+ TEST_BSX(bsfw, "w", 0);
+ TEST_BSX(bsfw, "w", 0x12340128);
+ TEST_BSX(bsfl, "", 0);
+ TEST_BSX(bsfl, "", 0x00340128);
+}
+
+/**********************************************/
+
+void test_fops(double a, double b)
+{
+ xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b);
+ xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b);
+ xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b);
+ xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b);
+ xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
+ xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a));
+ xxprintf("a=%f sin(a)=%f\n", a, sin(a));
+ xxprintf("a=%f cos(a)=%f\n", a, cos(a));
+ xxprintf("a=%f tan(a)=%f\n", a, tan(a));
+ xxprintf("a=%f log(a)=%f\n", a, log(a));
+ xxprintf("a=%f exp(a)=%f\n", a, exp(a));
+ xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
+ /* just to test some op combining */
+ xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
+ xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
+ xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
+}
+
+void test_fcmp(double a, double b)
+{
+ xxprintf("(%f<%f)=%d\n",
+ a, b, a < b);
+ xxprintf("(%f<=%f)=%d\n",
+ a, b, a <= b);
+ xxprintf("(%f==%f)=%d\n",
+ a, b, a == b);
+ xxprintf("(%f>%f)=%d\n",
+ a, b, a > b);
+ xxprintf("(%f<=%f)=%d\n",
+ a, b, a >= b);
+ {
+ unsigned long long int rflags;
+ /* test f(u)comi instruction */
+ asm("fcomi %2, %1\n"
+ "pushfq\n"
+ "popq %0\n"
+ : "=r" (rflags)
+ : "t" (a), "u" (b));
+ xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C));
+ }
+}
+
+void test_fcvt(double a)
+{
+ float fa;
+ long double la;
+ int16_t fpuc;
+ int i;
+ int64 lla;
+ int ia;
+ int16_t wa;
+ double ra;
+
+ fa = a;
+ la = a;
+ xxprintf("(float)%f = %f\n", a, fa);
+ xxprintf("(long double)%f = %Lf\n", a, la);
+ xxprintf("a=%016Lx\n", *(long long *)&a);
+ xxprintf("la=%016Lx %04x\n", *(long long *)&la,
+ *(unsigned short *)((char *)(&la) + 8));
+
+ /* test all roundings */
+ asm volatile ("fstcw %0" : "=m" (fpuc));
+ for(i=0;i<4;i++) {
+ short zz = (fpuc & ~0x0c00) | (i << 10);
+ asm volatile ("fldcw %0" : : "m" (zz));
+ asm volatile ("fists %0" : "=m" (wa) : "t" (a));
+ asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
+ asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
+ asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
+ asm volatile ("fldcw %0" : : "m" (fpuc));
+ xxprintf("(short)a = %d\n", wa);
+ xxprintf("(int)a = %d\n", ia);
+ xxprintf("(int64_t)a = %Ld\n", lla);
+ xxprintf("rint(a) = %f\n", ra);
+ }
+}
+
+#define TEST(N) \
+ asm("fld" #N : "=t" (a)); \
+ xxprintf("fld" #N "= %f\n", a);
+
+void test_fconst(void)
+{
+ double a;
+ TEST(1);
+ TEST(l2t);
+ TEST(l2e);
+ TEST(pi);
+ TEST(lg2);
+ TEST(ln2);
+ TEST(z);
+}
+
+void test_fbcd(double a)
+{
+ unsigned short bcd[5];
+ double b;
+
+ asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
+ asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
+ xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
+ a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
+}
+
+#define TEST_ENV(env, save, restore)\
+{\
+ memset((env), 0xaa, sizeof(*(env)));\
+ for(i=0;i<5;i++)\
+ asm volatile ("fldl %0" : : "m" (dtab[i]));\
+ asm(save " %0\n" : : "m" (*(env)));\
+ asm(restore " %0\n": : "m" (*(env)));\
+ for(i=0;i<5;i++)\
+ asm volatile ("fstpl %0" : "=m" (rtab[i]));\
+ for(i=0;i<5;i++)\
+ xxprintf("res[%d]=%f\n", i, rtab[i]);\
+ xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\
+ (env)->fpuc,\
+ (env)->fpus & 0xff00,\
+ (env)->fptag);\
+}
+
+void test_fenv(void)
+{
+ struct __attribute__((packed)) {
+ uint16_t fpuc;
+ uint16_t dummy1;
+ uint16_t fpus;
+ uint16_t dummy2;
+ uint16_t fptag;
+ uint16_t dummy3;
+ uint32_t ignored[4];
+ long double fpregs[8];
+ } float_env32;
+ double dtab[8];
+ double rtab[8];
+ int i;
+
+ for(i=0;i<8;i++)
+ dtab[i] = i + 1;
+
+ TEST_ENV(&float_env32, "fnstenv", "fldenv");
+ TEST_ENV(&float_env32, "fnsave", "frstor");
+
+ /* test for ffree */
+ for(i=0;i<5;i++)
+ asm volatile ("fldl %0" : : "m" (dtab[i]));
+ asm volatile("ffree %st(2)");
+ asm volatile ("fnstenv %0\n" : : "m" (float_env32));
+ asm volatile ("fninit");
+ xxprintf("fptag=%04x\n", float_env32.fptag);
+}
+
+
+#define TEST_FCMOV(a, b, rflags, CC)\
+{\
+ double res;\
+ asm("pushq %3\n"\
+ "popfq\n"\
+ "fcmov" CC " %2, %0\n"\
+ : "=t" (res)\
+ : "0" (a), "u" (b), "g" (rflags));\
+ xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \
+ CC, rflags, res);\
+}
+
+void test_fcmov(void)
+{
+ double a, b;
+ int64 rflags, i;
+
+ a = 1.0;
+ b = 2.0;
+ for(i = 0; i < 4; i++) {
+ rflags = 0;
+ if (i & 1)
+ rflags |= CC_C;
+ if (i & 2)
+ rflags |= CC_Z;
+ TEST_FCMOV(a, b, rflags, "b");
+ TEST_FCMOV(a, b, rflags, "e");
+ TEST_FCMOV(a, b, rflags, "be");
+ TEST_FCMOV(a, b, rflags, "nb");
+ TEST_FCMOV(a, b, rflags, "ne");
+ TEST_FCMOV(a, b, rflags, "nbe");
+ }
+ TEST_FCMOV(a, b, (int64)0, "u");
+ TEST_FCMOV(a, b, (int64)CC_P, "u");
+ TEST_FCMOV(a, b, (int64)0, "nu");
+ TEST_FCMOV(a, b, (int64)CC_P, "nu");
+}
+
+void test_floats(void)
+{
+ test_fops(2, 3);
+ test_fops(1.4, -5);
+ test_fcmp(2, -1);
+ test_fcmp(2, 2);
+ test_fcmp(2, 3);
+ test_fcvt(0.5);
+ test_fcvt(-0.5);
+ test_fcvt(1.0/7.0);
+ test_fcvt(-1.0/9.0);
+ test_fcvt(32768);
+ test_fcvt(-1e20);
+ test_fconst();
+ // REINSTATE (maybe): test_fbcd(1234567890123456);
+ // REINSTATE (maybe): test_fbcd(-123451234567890);
+ // REINSTATE: test_fenv();
+ // REINSTATE: test_fcmov();
+}
+
+/**********************************************/
+
+#define TEST_XCHG(op, size, opconst)\
+{\
+ int op0, op1;\
+ op0 = 0x12345678;\
+ op1 = 0xfbca7654;\
+ asm(#op " %" size "0, %" size "1" \
+ : "=q" (op0), opconst (op1) \
+ : "0" (op0), "1" (op1));\
+ xxprintf("%-10s A=%08x B=%08x\n",\
+ #op, op0, op1);\
+}
+
+#define TEST_CMPXCHG(op, size, opconst, eax)\
+{\
+ int op0, op1;\
+ op0 = 0x12345678;\
+ op1 = 0xfbca7654;\
+ asm(#op " %" size "0, %" size "1" \
+ : "=q" (op0), opconst (op1) \
+ : "0" (op0), "1" (op1), "a" (eax));\
+ xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\
+ #op, eax, op0, op1);\
+}
+
+
+/**********************************************/
+/* segmentation tests */
+
+extern char func_lret32;
+extern char func_iret32;
+
+uint8_t str_buffer[4096];
+
+#define TEST_STRING1(OP, size, DF, REP)\
+{\
+ int64 rsi, rdi, rax, rcx, rflags;\
+\
+ rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\
+ rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
+ rax = 0x12345678;\
+ rcx = 17;\
+\
+ asm volatile ("pushq $0\n\t"\
+ "popfq\n\t"\
+ DF "\n\t"\
+ REP #OP size "\n\t"\
+ "cld\n\t"\
+ "pushfq\n\t"\
+ "popq %4\n\t"\
+ : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\
+ : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\
+ xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\
+ REP #OP size, rsi, rdi, rax, rcx,\
+ rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
+}
+
+#define TEST_STRING(OP, REP)\
+ TEST_STRING1(OP, "b", "", REP);\
+ TEST_STRING1(OP, "w", "", REP);\
+ TEST_STRING1(OP, "l", "", REP);\
+ TEST_STRING1(OP, "b", "std", REP);\
+ TEST_STRING1(OP, "w", "std", REP);\
+ TEST_STRING1(OP, "l", "std", REP)
+
+void test_string(void)
+{
+ int64 i;
+ for(i = 0;i < sizeof(str_buffer); i++)
+ str_buffer[i] = i + 0x56;
+ TEST_STRING(stos, "");
+ TEST_STRING(stos, "rep ");
+ TEST_STRING(lods, ""); /* to verify stos */
+ // TEST_STRING(lods, "rep ");
+ TEST_STRING(movs, "");
+ TEST_STRING(movs, "rep ");
+ TEST_STRING(lods, ""); /* to verify stos */
+
+ /* XXX: better tests */
+ TEST_STRING(scas, "");
+ TEST_STRING(scas, "repz ");
+ TEST_STRING(scas, "repnz ");
+ // REINSTATE? TEST_STRING(cmps, "");
+ TEST_STRING(cmps, "repz ");
+ // REINSTATE? TEST_STRING(cmps, "repnz ");
+}
+
+int main(int argc, char **argv)
+{
+ // The three commented out test cases produce different results at different
+ // compiler optimisation levels. This suggests to me that their inline
+ // assembly is incorrect. I don't have time to investigate now, though. So
+ // they are disabled.
+ xxprintf_start();
+ test_adc();
+ test_adcx();
+ test_add();
+ test_adox();
+ test_and();
+ // test_bsx();
+ test_cmp();
+ test_dec();
+ test_fcmov();
+ test_fconst();
+ test_fenv();
+ test_floats();
+ test_inc();
+ // test_jcc();
+ test_lea();
+ test_mul();
+ test_neg();
+ test_not();
+ test_or();
+ test_rcl();
+ test_rcr();
+ test_rol();
+ test_ror();
+ test_sar();
+ test_sbb();
+ test_shl();
+ test_shr();
+ // test_string();
+ test_sub();
+ test_xor();
+ xxprintf_done();
+ // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3
+ return 0;
+}

Added: trunk/none/tests/amd64/fb_test_amd64.h
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64.h (added)
+++ trunk/none/tests/amd64/fb_test_amd64.h Sun May 14 08:56:41 2017
@@ -0,0 +1,229 @@
+
+#define exec_op glue(exec_, OP)
+#define exec_opq glue(glue(exec_, OP), q)
+#define exec_opl glue(glue(exec_, OP), l)
+#define exec_opw glue(glue(exec_, OP), w)
+#define exec_opb glue(glue(exec_, OP), b)
+
+#define EXECOP2(size, mod, res, s1, flags) \
+ asm ("pushq %4\n\t"\
+ "popfq\n\t"\
+ stringify(OP) size " %" mod "2, %" mod "0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=q" (res), "=g" (flags)\
+ : "q" (s1), "0" (res), "1" (flags));
+
+#define EXECOP1(size, mod, res, flags) \
+ asm ("pushq %3\n\t"\
+ "popfq\n\t"\
+ stringify(OP) size " %" mod "0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=q" (res), "=g" (flags)\
+ : "0" (res), "1" (flags));
+
+#ifdef OP1
+static inline void exec_opq(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("q", "q", res, flags);
+ xxprintf("%-6s A=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "q", s0, res, iflags, flags & CC_MASK);
+}
+static inline void exec_opl(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("l", "k", res, flags);
+ xxprintf("%-6s A=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "l", s0, res, iflags, flags & CC_MASK);
+}
+static inline void exec_opw(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("w", "w", res, flags);
+ xxprintf("%-6s A=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "w", s0, res, iflags, flags & CC_MASK);
+}
+static inline void exec_opb(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("b", "b", res, flags);
+ xxprintf("%-6s A=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "b", s0, res, iflags, flags & CC_MASK);
+}
+#else
+static inline void exec_opq(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("q", "q", res, s1, flags);
+ xxprintf("%-6s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "q", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+static inline void exec_opl(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("l", "k", res, s1, flags);
+ xxprintf("%-6s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "l", s0, s1, res, iflags, flags & CC_MASK);
+}
+#ifndef NSH
+static inline void exec_opw(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("w", "w", res, s1, flags);
+ xxprintf("%-6s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "w", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+static inline void exec_opb(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("b", "b", res, s1, flags);
+ xxprintf("%-6s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "b", s0, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+#endif
+
+void exec_op(int64 s0, int64 s1)
+{
+#if 1
+ int64 o,s,z,a,c,p,flags_in;
+ for (o = 0; o < 2; o++) {
+ for (s = 0; s < 2; s++) {
+ for (z = 0; z < 2; z++) {
+ for (a = 0; a < 2; a++) {
+ for (c = 0; c < 2; c++) {
+ for (p = 0; p < 2; p++) {
+
+ flags_in = (o ? CC_O : 0)
+ | (s ? CC_S : 0)
+ | (z ? CC_Z : 0)
+ | (a ? CC_A : 0)
+ | (c ? CC_C : 0)
+ | (p ? CC_P : 0);
+ exec_opq(s0, s1, flags_in);
+ exec_opl(s0, s1, flags_in);
+#ifndef NSH
+ exec_opw(s0, s1, flags_in);
+ exec_opb(s0, s1, flags_in);
+#endif
+ }}}}}}
+#else
+ exec_opq(s0, s1, 0);
+ exec_opl(s0, s1, 0);
+ exec_opw(s0, s1, 0);
+ exec_opb(s0, s1, 0);
+ exec_opq(s0, s1, CC_C);
+ exec_opl(s0, s1, CC_C);
+ exec_opw(s0, s1, CC_C);
+ exec_opb(s0, s1, CC_C);
+#endif
+}
+
+void glue(test_, OP)(void)
+{
+#define NVALS 57
+ int64 i, j;
+ static unsigned int val[NVALS]
+ = { 0x00, 0x01, 0x02, 0x03,
+ 0x3F, 0x40, 0x41,
+ 0x7E, 0x7F, 0x80, 0x81, 0x82,
+ 0xBF, 0xC0, 0xC1,
+ 0xFC, 0xFD, 0xFE, 0xFF,
+
+ 0xFF00, 0xFF01, 0xFF02, 0xFF03,
+ 0xFF3F, 0xFF40, 0xFF41,
+ 0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
+ 0xFFBF, 0xFFC0, 0xFFC1,
+ 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
+
+ 0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
+ 0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
+ 0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
+ 0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
+ 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
+ };
+
+ exec_op(0xabcd12345678, 0x4321812FADA);
+ exec_op(0x12345678, 0x812FADA);
+ exec_op(0xabcd00012341, 0xabcd00012341);
+ exec_op(0x12341, 0x12341);
+ exec_op(0x12341, -0x12341);
+ exec_op(0xffffffff, 0);
+ exec_op(0xffffffff, -1);
+ exec_op(0xffffffff, 1);
+ exec_op(0xffffffff, 2);
+ exec_op(0x7fffffff, 0);
+ exec_op(0x7fffffff, 1);
+ exec_op(0x7fffffff, -1);
+ exec_op(0x80000000, -1);
+ exec_op(0x80000000, 1);
+ exec_op(0x80000000, -2);
+ exec_op(0x12347fff, 0);
+ exec_op(0x12347fff, 1);
+ exec_op(0x12347fff, -1);
+ exec_op(0x12348000, -1);
+ exec_op(0x12348000, 1);
+ exec_op(0x12348000, -2);
+ exec_op(0x12347f7f, 0);
+ exec_op(0x12347f7f, 1);
+ exec_op(0x12347f7f, -1);
+ exec_op(0x12348080, -1);
+ exec_op(0x12348080, 1);
+ exec_op(0x12348080, -2);
+
+ exec_op(0xFFFFFFFFffffffff, 0);
+ exec_op(0xFFFFFFFFffffffff, -1);
+ exec_op(0xFFFFFFFFffffffff, 1);
+ exec_op(0xFFFFFFFFffffffff, 2);
+ exec_op(0x7fffffffFFFFFFFF, 0);
+ exec_op(0x7fffffffFFFFFFFF, 1);
+ exec_op(0x7fffffffFFFFFFFF, -1);
+ exec_op(0x8000000000000000, -1);
+ exec_op(0x8000000000000000, 1);
+ exec_op(0x8000000000000000, -2);
+ exec_op(0x123443217FFFFFFF, 0);
+ exec_op(0x123443217FFFFFFF, 1);
+ exec_op(0x123443217FFFFFFF, -1);
+ exec_op(0x1234432180000000, -1);
+ exec_op(0x1234432180000000, 1);
+ exec_op(0x1234432180000000, -2);
+ exec_op(0x123443217F7F7f7f, 0);
+ exec_op(0x123443217F7F7f7f, 1);
+ exec_op(0x123443217F7F7f7f, -1);
+ exec_op(0x1234432180808080, -1);
+ exec_op(0x1234432180808080, 1);
+ exec_op(0x1234432180808080, -2);
+
+#if TEST_INTEGER_VERBOSE
+ if (1)
+ for (i = 0; i < NVALS; i++)
+ for (j = 0; j < NVALS; j++)
+ exec_op(val[i], val[j]);
+#endif
+
+#undef NVALS
+}
+
+#undef OP
+#undef OP_CC
+#undef NSH

Added: trunk/none/tests/amd64/fb_test_amd64.stderr.exp
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64.stderr.exp (added)
+++ trunk/none/tests/amd64/fb_test_amd64.stderr.exp Sun May 14 08:56:41 2017
@@ -0,0 +1,2 @@
+
+

Added: trunk/none/tests/amd64/fb_test_amd64.stdout.exp
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64.stdout.exp (added)
+++ trunk/none/tests/amd64/fb_test_amd64.stdout.exp Sun May 14 08:56:41 2017
@@ -0,0 +1 @@
+final MD5 = 66802c845574c7c69f30d29ef85f7ca3

Added: trunk/none/tests/amd64/fb_test_amd64.vgtest
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64.vgtest (added)
+++ trunk/none/tests/amd64/fb_test_amd64.vgtest Sun May 14 08:56:41 2017
@@ -0,0 +1 @@
+prog: fb_test_amd64

Added: trunk/none/tests/amd64/fb_test_amd64_muldiv.h
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64_muldiv.h (added)
+++ trunk/none/tests/amd64/fb_test_amd64_muldiv.h Sun May 14 08:56:41 2017
@@ -0,0 +1,74 @@
+
+void glue(glue(test_, OP), b)(int64 op0, int64 op1)
+{
+ int64 res, s1, s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm ("pushq %4\n\t"
+ "popfq\n\t"
+ stringify(OP)"b %b2\n\t"
+ "pushfq\n\t"
+ "popq %1\n\t"
+ : "=a" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
+ stringify(OP) "b", s0, s1, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), w)(int64 op0h, int64 op0, int64 op1)
+{
+ int64 res, s1, flags, resh;
+ s1 = op1;
+ resh = op0h;
+ res = op0;
+ flags = 0;
+ asm ("pushq %5\n\t"
+ "popfq\n\t"
+ stringify(OP) "w %w3\n\t"
+ "pushfq\n\t"
+ "popq %1\n\t"
+ : "=a" (res), "=g" (flags), "=d" (resh)
+ : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+ xxprintf("%-10s AH=%016llx AL=%016llx B=%016llx RH=%016llx RL=%016llx CC=%04llx\n",
+ stringify(OP) "w", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), l)(int64 op0h, int64 op0, int64 op1)
+{
+ int64 res, s1, flags, resh;
+ s1 = op1;
+ resh = op0h;
+ res = op0;
+ flags = 0;
+ asm ("pushq %5\n\t"
+ "popfq\n\t"
+ stringify(OP) "l %3\n\t"
+ "pushfq\n\t"
+ "popq %1\n\t"
+ : "=a" (res), "=g" (flags), "=d" (resh)
+ : "q" ((int)s1), "0" (res), "1" (flags), "2" (resh));
+ xxprintf("%-10s AH=%016llx AL=%016llx B=%016llx RH=%016llx RL=%016llx CC=%04llx\n",
+ stringify(OP) "l", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), q)(int64 op0h, int64 op0, int64 op1)
+{
+ int64 res, s1, flags, resh;
+ s1 = op1;
+ resh = op0h;
+ res = op0;
+ flags = 0;
+ asm ("pushq %5\n\t"
+ "popfq\n\t"
+ stringify(OP) "q %3\n\t"
+ "pushfq\n\t"
+ "popq %1\n\t"
+ : "=a" (res), "=g" (flags), "=d" (resh)
+ : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+ xxprintf("%-10s AH=%016llx AL=%016llx B=%016llx RH=%016llx RL=%016llx CC=%04llx\n",
+ stringify(OP) "q", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+#undef OP

Added: trunk/none/tests/amd64/fb_test_amd64_shift.h
==============================================================================
--- trunk/none/tests/amd64/fb_test_amd64_shift.h (added)
+++ trunk/none/tests/amd64/fb_test_amd64_shift.h Sun May 14 08:56:41 2017
@@ -0,0 +1,176 @@
+
+#define exec_op glue(exec_, OP)
+#define exec_opq glue(glue(exec_, OP), q)
+#define exec_opl glue(glue(exec_, OP), l)
+#define exec_opw glue(glue(exec_, OP), w)
+#define exec_opb glue(glue(exec_, OP), b)
+
+#ifndef OP_SHIFTD
+
+#ifdef OP_NOBYTE
+#define EXECSHIFT(size, res, s1, s2, flags) \
+ asm ("pushq %4\n\t"\
+ "popfq\n\t"\
+ stringify(OP) size " %" size "2, %" size "0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=g" (res), "=g" (flags)\
+ : "r" (s1), "0" (res), "1" (flags));
+#else
+#define EXECSHIFT(size, res, s1, s2, flags) \
+ asm ("pushq %4\n\t"\
+ "popfq\n\t"\
+ stringify(OP) size " %%cl, %" size "0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=q" (res), "=g" (flags)\
+ : "c" (s1), "0" (res), "1" (flags));
+#endif
+
+void exec_opq(int64 s2, int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("q", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ xxprintf("%-10s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "q", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opl(int64 s2, int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ xxprintf("%-10s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "l", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opw(int64 s2, int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("w", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ xxprintf("%-10s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "w", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+#else
+#define EXECSHIFT(size, res, s1, s2, flags) \
+ asm ("pushq %4\n\t"\
+ "popfq\n\t"\
+ stringify(OP) size " %%cl, %" size "5, %" size "0\n\t" \
+ "pushfq\n\t"\
+ "popq %1\n\t"\
+ : "=g" (res), "=g" (flags)\
+ : "c" (s1), "0" (res), "1" (flags), "r" (s2));
+
+void exec_opl(int64 s2, int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ xxprintf("%-10s A=%016llx B=%016llx C=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "l", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opw(int64 s2, int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("w", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ xxprintf("%-10s A=%016llx B=%016llx C=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "w", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+
+#endif
+
+#ifndef OP_NOBYTE
+void exec_opb(int64 s0, int64 s1, int64 iflags)
+{
+ int64 res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("b", res, s1, 0, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ xxprintf("%-10s A=%016llx B=%016llx R=%016llx CCIN=%04llx CC=%04llx\n",
+ stringify(OP) "b", s0, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_op(int64 s2, int64 s0, int64 s1)
+{
+ int64 o,s,z,a,c,p,flags_in;
+ for (o = 0; o < 2; o++) {
+ for (s = 0; s < 2; s++) {
+ for (z = 0; z < 2; z++) {
+ for (a = 0; a < 2; a++) {
+ for (c = 0; c < 2; c++) {
+ for (p = 0; p < 2; p++) {
+
+ flags_in = (o ? CC_O : 0)
+ | (s ? CC_S : 0)
+ | (z ? CC_Z : 0)
+ | (a ? CC_A : 0)
+ | (c ? CC_C : 0)
+ | (p ? CC_P : 0);
+
+ exec_opq(s2, s0, s1, flags_in);
+ if (s1 <= 31)
+ exec_opl(s2, s0, s1, flags_in);
+#ifdef OP_SHIFTD
+ if (s1 <= 15)
+ exec_opw(s2, s0, s1, flags_in);
+#else
+ exec_opw(s2, s0, s1, flags_in);
+#endif
+#ifndef OP_NOBYTE
+ exec_opb(s0, s1, flags_in);
+#endif
+#ifdef OP_CC
+ exec_opq(s2, s0, s1, flags_in);
+ exec_opl(s2, s0, s1, flags_in);
+ exec_opw(s2, s0, s1, flags_in);
+ exec_opb(s0, s1, flags_in);
+#endif
+
+ }}}}}}
+
+}
+
+void glue(test_, OP)(void)
+{
+ int64 i;
+ for(i = 0; i < 64; i++)
+ exec_op(0x3141592721ad3d34, 0x2718284612345678, i);
+ for(i = 0; i < 64; i++)
+ exec_op(0x31415927813f3421, 0x2718284682345678, i);
+}
+
+#undef OP
+#undef OP_CC
+#undef OP_SHIFTD
+#undef OP_NOBYTE
+#undef EXECSHIFT
+

Loading...