[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Minios-devel] [UNIKRAFT PATCH 03/13] arch: Reorganize arch folder to reflect the CPU family schema
As we have introduced UK_FAMILY for build scipts. In order to reflect the CPU family schema, we modify the "arch/UK_ARCH" to "arch/UK_FAMILY/UK_ARCH". The "arch/UK_FAMILY" can be used to store the common codes and build scripts for the architectures of same family. For individual architectures, it might make sense to use forward declarations in Compiler.uk and Makefile.uk to include a corresponding file in a subfolder. For instance arm/Makefile.uk includes arm/arm/Makefile.uk when an ARM32 architecture was selected. Signed-off-by: Wei Chen <Wei.Chen@xxxxxxx> --- Makefile | 13 +- arch/Config.uk | 4 +- arch/arm/Compiler.uk | 7 +- arch/arm/Config.uk | 18 -- arch/arm/Makefile.uk | 23 +-- arch/arm/arm/Compiler.uk | 6 + arch/arm/arm/Config.uk | 18 ++ arch/arm/arm/Makefile.uk | 19 ++ arch/arm/arm/divsi3.S | 403 +++++++++++++++++++++++++++++++++++++++++ arch/arm/arm/ldivmod.S | 68 +++++++ arch/arm/arm/ldivmod_helper.c | 67 +++++++ arch/arm/arm/qdivrem.c | 324 +++++++++++++++++++++++++++++++++ arch/arm/divsi3.S | 404 ------------------------------------------ arch/arm/ldivmod.S | 68 ------- arch/arm/ldivmod_helper.c | 67 ------- arch/arm/qdivrem.c | 324 --------------------------------- arch/x86/Compiler.uk | 4 + arch/x86/Makefile.uk | 4 + arch/x86/x86_64/Compiler.uk | 6 + arch/x86/x86_64/Config.uk | 89 ++++++++++ arch/x86/x86_64/Makefile.uk | 37 ++++ arch/x86_64/Compiler.uk | 6 - arch/x86_64/Config.uk | 89 ---------- arch/x86_64/Makefile.uk | 37 ---- 24 files changed, 1059 insertions(+), 1046 deletions(-) delete mode 100644 arch/arm/Config.uk create mode 100644 arch/arm/arm/Compiler.uk create mode 100644 arch/arm/arm/Config.uk create mode 100644 arch/arm/arm/Makefile.uk create mode 100644 arch/arm/arm/divsi3.S create mode 100644 arch/arm/arm/ldivmod.S create mode 100644 arch/arm/arm/ldivmod_helper.c create mode 100644 arch/arm/arm/qdivrem.c delete mode 100644 arch/arm/divsi3.S delete mode 100644 arch/arm/ldivmod.S delete mode 100644 arch/arm/ldivmod_helper.c delete mode 100644 arch/arm/qdivrem.c create mode 100644 arch/x86/Compiler.uk create mode 100644 arch/x86/Makefile.uk create mode 100644 arch/x86/x86_64/Compiler.uk create mode 100644 arch/x86/x86_64/Config.uk create mode 100644 arch/x86/x86_64/Makefile.uk delete mode 100644 arch/x86_64/Compiler.uk delete mode 100644 arch/x86_64/Config.uk delete mode 100644 arch/x86_64/Makefile.uk diff --git a/Makefile b/Makefile index 63495ca..ca9b566 100644 --- a/Makefile +++ b/Makefile @@ -297,7 +297,6 @@ export HOSTARCH := $(shell LC_ALL=C $(HOSTCC_NOCCACHE) -v 2>&1 | \ export HOSTAR HOSTAS HOSTCC HOSTCC_VERSION HOSTCXX HOSTLD HOSTARCH export HOSTCC_NOCCACHE HOSTCXX_NOCCACHE - ################################################################################ # Unikraft Architecture ################################################################################ @@ -349,12 +348,12 @@ export UK_FAMILY ?= $(shell echo "$(UK_ARCH)" | \ # Quick-check if architecture exists ifeq ($(filter $(null_targets) print-vars,$(MAKECMDGOALS)),) -ifeq ($(wildcard $(UK_BASE)/arch/$(ARCH)/Makefile.uk),) -$(error Target architecture ($(ARCH)) is currently not supported (could not find $(UK_BASE)/arch/$(ARCH)/Makefile.uk).) +ifeq ($(wildcard $(UK_BASE)/arch/$(UK_FAMILY)/$(ARCH)/Makefile.uk),) +$(error Target architecture ($(ARCH)) is currently not supported (could not find $(UK_BASE)/arch/$(UK_FAMILY)/$(ARCH)/Makefile.uk).) endif -ifeq ($(wildcard arch/$(ARCH)/Compiler.uk),) -$(error Target architecture ($(ARCH)) is currently not supported (could not find $(UK_BASE)/arch/$(ARCH)/Compiler.uk).) +ifeq ($(wildcard $(UK_BASE)/arch/$(UK_FAMILY)/$(ARCH)/Compiler.uk),) +$(error Target architecture ($(ARCH)) is currently not supported (could not find $(UK_BASE)/arch/$(UK_FAMILY)/$(ARCH)/Compiler.uk).) endif endif @@ -393,7 +392,7 @@ unexport MACHINE # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile.uk CROSS_COMPILE := $(CROSS_COMPILE:"%"=%) -include $(UK_BASE)/arch/$(UK_ARCH)/Compiler.uk +include $(UK_BASE)/arch/$(UK_FAMILY)/Compiler.uk # Make variables (CC, etc...) LD := $(CROSS_COMPILE)ld @@ -455,7 +454,7 @@ endif $(foreach E,$(ELIB_DIR), \ $(eval $(call _import_lib,$(E))); \ ) -$(eval $(call _import_lib,$(UK_BASE)/arch/$(UK_ARCH))) # architecture libraries +$(eval $(call _import_lib,$(UK_BASE)/arch/$(UK_FAMILY))) # architecture libraries include $(UK_BASE)/plat/Makefile.uk # platform libraries include $(UK_BASE)/lib/Makefile.uk # libraries include $(UK_BASE)/Makefile.uk # Unikraft base diff --git a/arch/Config.uk b/arch/Config.uk index 36df8d1..9236273 100644 --- a/arch/Config.uk +++ b/arch/Config.uk @@ -13,8 +13,8 @@ config ARCH_ARM_32 endchoice if (ARCH_X86_64) - source "arch/x86_64/Config.uk" + source "arch/x86/x86_64/Config.uk" endif if (ARCH_ARM_32) - source "arch/arm/Config.uk" + source "arch/arm/arm/Config.uk" endif diff --git a/arch/arm/Compiler.uk b/arch/arm/Compiler.uk index ee34916..02e8a02 100644 --- a/arch/arm/Compiler.uk +++ b/arch/arm/Compiler.uk @@ -1,6 +1,3 @@ -# set cross compile -ifeq ($(CROSS_COMPILE),) -ifneq ($(UK_ARCH),$(HOSTARCH)) - CROSS_COMPILE := arm-linux-gnueabihf- -endif +ifeq ($(UK_ARCH),arm) + include $(UK_BASE)/arch/arm/arm/Compiler.uk endif diff --git a/arch/arm/Config.uk b/arch/arm/Config.uk deleted file mode 100644 index 426c113..0000000 --- a/arch/arm/Config.uk +++ /dev/null @@ -1,18 +0,0 @@ -choice - prompt "Processor Optimization" - default MARCH_CORTEXA7 - help - Optimize the code for selected target processor - -config MARCH_CORTEXA7 - bool "Generic Cortex A7" - help - Compile for Cortex-A7 CPUs, no hardware FPU support - -config MARCH_A20NEON - bool "Cortex A7: AllWinner A20" - help - Compile for AllWinner A20 (Cortex-A7) CPUs - The NEON FPU is enabled. Please note that NEON is - not fully IEEE 754 compliant. -endchoice diff --git a/arch/arm/Makefile.uk b/arch/arm/Makefile.uk index 2567dbe..3ab4df9 100644 --- a/arch/arm/Makefile.uk +++ b/arch/arm/Makefile.uk @@ -1,19 +1,4 @@ -ASFLAGS += -D__ARM_32__ -ASFLAGS += -marm -CFLAGS += -D__ARM_32__ -CFLAGS += -marm -fms-extensions -CXXFLAGS += -D__ARM_32__ -CXXFLAGS += -marm -fms-extensions - -#-march=armv7-a - -CFLAGS-$(MARCH_A20NEON) += -mcpu=cortex-a7 -mtune=cortex-a7 -mfpu=vfpv4-d16 -mfpu=neon-vfpv4 -funsafe-math-optimizations -CXXFLAGS-$(MARCH_A20NEON) += -mcpu=cortex-a7 -mtune=cortex-a7 -mfpu=vfpv4-d16 -mfpu=neon-vfpv4 -funsafe-math-optimizations -CFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a7 -CXXFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a - -$(eval $(call addlib,libarmmath)) -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/divsi3.S -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/ldivmod.S -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/ldivmod_helper.c -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/qdivrem.c +# Include corresponding Makefiles for specified architechtures +ifeq ($(UK_ARCH),arm) + include $(UK_BASE)/arch/arm/arm/Makefile.uk +endif diff --git a/arch/arm/arm/Compiler.uk b/arch/arm/arm/Compiler.uk new file mode 100644 index 0000000..ee34916 --- /dev/null +++ b/arch/arm/arm/Compiler.uk @@ -0,0 +1,6 @@ +# set cross compile +ifeq ($(CROSS_COMPILE),) +ifneq ($(UK_ARCH),$(HOSTARCH)) + CROSS_COMPILE := arm-linux-gnueabihf- +endif +endif diff --git a/arch/arm/arm/Config.uk b/arch/arm/arm/Config.uk new file mode 100644 index 0000000..426c113 --- /dev/null +++ b/arch/arm/arm/Config.uk @@ -0,0 +1,18 @@ +choice + prompt "Processor Optimization" + default MARCH_CORTEXA7 + help + Optimize the code for selected target processor + +config MARCH_CORTEXA7 + bool "Generic Cortex A7" + help + Compile for Cortex-A7 CPUs, no hardware FPU support + +config MARCH_A20NEON + bool "Cortex A7: AllWinner A20" + help + Compile for AllWinner A20 (Cortex-A7) CPUs + The NEON FPU is enabled. Please note that NEON is + not fully IEEE 754 compliant. +endchoice diff --git a/arch/arm/arm/Makefile.uk b/arch/arm/arm/Makefile.uk new file mode 100644 index 0000000..2614425 --- /dev/null +++ b/arch/arm/arm/Makefile.uk @@ -0,0 +1,19 @@ +ASFLAGS += -D__ARM_32__ +ASFLAGS += -marm +CFLAGS += -D__ARM_32__ +CFLAGS += -marm -fms-extensions +CXXFLAGS += -D__ARM_32__ +CXXFLAGS += -marm -fms-extensions + +#-march=armv7-a + +CFLAGS-$(MARCH_A20NEON) += -mcpu=cortex-a7 -mtune=cortex-a7 -mfpu=vfpv4-d16 -mfpu=neon-vfpv4 -funsafe-math-optimizations +CXXFLAGS-$(MARCH_A20NEON) += -mcpu=cortex-a7 -mtune=cortex-a7 -mfpu=vfpv4-d16 -mfpu=neon-vfpv4 -funsafe-math-optimizations +CFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a7 +CXXFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a + +$(eval $(call addlib,libarmmath)) +LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/arm/divsi3.S +LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/arm/ldivmod.S +LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/arm/ldivmod_helper.c +LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/arm/qdivrem.c diff --git a/arch/arm/arm/divsi3.S b/arch/arm/arm/divsi3.S new file mode 100644 index 0000000..23edbf7 --- /dev/null +++ b/arch/arm/arm/divsi3.S @@ -0,0 +1,403 @@ +/* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */ + +/*- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define ENTRY_NP(symbol) \ + .globl symbol; \ + symbol: + +#define END(symbol) + +/* + * stack is aligned as there's a possibility of branching to L_overflow + * which makes a C call + */ + +ENTRY_NP(__umodsi3) + stmfd sp!, {lr} + sub sp, sp, #4 /* align stack */ + bl .L_udivide + add sp, sp, #4 /* unalign stack */ + mov r0, r1 + ldmfd sp!, {pc} +END(__umodsi3) + +ENTRY_NP(__modsi3) + stmfd sp!, {lr} + sub sp, sp, #4 /* align stack */ + bl .L_divide + add sp, sp, #4 /* unalign stack */ + mov r0, r1 + ldmfd sp!, {pc} + +.L_overflow: + /* XXX should cause a fatal error */ + mvn r0, #0 + mov pc, lr + +END(__modsi3) + +#ifdef __ARM_EABI__ +ENTRY_NP(__aeabi_uidiv) +ENTRY_NP(__aeabi_uidivmod) +#endif +ENTRY_NP(__udivsi3) +.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + /* r0 = r1 / r0; r1 = r1 % r0 */ + cmp r0, #1 + bcc .L_overflow + beq .L_divide_l0 + mov ip, #0 + movs r1, r1 + bpl .L_divide_l1 + orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */ + movs r1, r1, lsr #1 + orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */ + b .L_divide_l1 + +.L_divide_l0: /* r0 == 1 */ + mov r0, r1 + mov r1, #0 + mov pc, lr +#ifdef __ARM_EABI__ +END(__aeabi_uidiv) +END(__aeabi_uidivmod) +#endif +END(__udivsi3) + +#ifdef __ARM_EABI__ +ENTRY_NP(__aeabi_idiv) +ENTRY_NP(__aeabi_idivmod) +#endif +ENTRY_NP(__divsi3) +.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + /* r0 = r1 / r0; r1 = r1 % r0 */ + cmp r0, #1 + bcc .L_overflow + beq .L_divide_l0 + ands ip, r0, #0x80000000 + rsbmi r0, r0, #0 + ands r2, r1, #0x80000000 + eor ip, ip, r2 + rsbmi r1, r1, #0 + orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */ + /* ip bit 0x80000000 = -ve remainder */ + +.L_divide_l1: + mov r2, #1 + mov r3, #0 + + /* + * If the highest bit of the dividend is set, we have to be + * careful when shifting the divisor. Test this. + */ + movs r1,r1 + bpl .L_old_code + + /* + * At this point, the highest bit of r1 is known to be set. + * We abuse this below in the tst instructions. + */ + tst r1, r0 /*, lsl #0 */ + bmi .L_divide_b1 + tst r1, r0, lsl #1 + bmi .L_divide_b2 + tst r1, r0, lsl #2 + bmi .L_divide_b3 + tst r1, r0, lsl #3 + bmi .L_divide_b4 + tst r1, r0, lsl #4 + bmi .L_divide_b5 + tst r1, r0, lsl #5 + bmi .L_divide_b6 + tst r1, r0, lsl #6 + bmi .L_divide_b7 + tst r1, r0, lsl #7 + bmi .L_divide_b8 + tst r1, r0, lsl #8 + bmi .L_divide_b9 + tst r1, r0, lsl #9 + bmi .L_divide_b10 + tst r1, r0, lsl #10 + bmi .L_divide_b11 + tst r1, r0, lsl #11 + bmi .L_divide_b12 + tst r1, r0, lsl #12 + bmi .L_divide_b13 + tst r1, r0, lsl #13 + bmi .L_divide_b14 + tst r1, r0, lsl #14 + bmi .L_divide_b15 + tst r1, r0, lsl #15 + bmi .L_divide_b16 + tst r1, r0, lsl #16 + bmi .L_divide_b17 + tst r1, r0, lsl #17 + bmi .L_divide_b18 + tst r1, r0, lsl #18 + bmi .L_divide_b19 + tst r1, r0, lsl #19 + bmi .L_divide_b20 + tst r1, r0, lsl #20 + bmi .L_divide_b21 + tst r1, r0, lsl #21 + bmi .L_divide_b22 + tst r1, r0, lsl #22 + bmi .L_divide_b23 + tst r1, r0, lsl #23 + bmi .L_divide_b24 + tst r1, r0, lsl #24 + bmi .L_divide_b25 + tst r1, r0, lsl #25 + bmi .L_divide_b26 + tst r1, r0, lsl #26 + bmi .L_divide_b27 + tst r1, r0, lsl #27 + bmi .L_divide_b28 + tst r1, r0, lsl #28 + bmi .L_divide_b29 + tst r1, r0, lsl #29 + bmi .L_divide_b30 + tst r1, r0, lsl #30 + bmi .L_divide_b31 +/* + * instead of: + * tst r1, r0, lsl #31 + * bmi .L_divide_b32 + */ + b .L_divide_b32 + +.L_old_code: + cmp r1, r0 + bcc .L_divide_b0 + cmp r1, r0, lsl #1 + bcc .L_divide_b1 + cmp r1, r0, lsl #2 + bcc .L_divide_b2 + cmp r1, r0, lsl #3 + bcc .L_divide_b3 + cmp r1, r0, lsl #4 + bcc .L_divide_b4 + cmp r1, r0, lsl #5 + bcc .L_divide_b5 + cmp r1, r0, lsl #6 + bcc .L_divide_b6 + cmp r1, r0, lsl #7 + bcc .L_divide_b7 + cmp r1, r0, lsl #8 + bcc .L_divide_b8 + cmp r1, r0, lsl #9 + bcc .L_divide_b9 + cmp r1, r0, lsl #10 + bcc .L_divide_b10 + cmp r1, r0, lsl #11 + bcc .L_divide_b11 + cmp r1, r0, lsl #12 + bcc .L_divide_b12 + cmp r1, r0, lsl #13 + bcc .L_divide_b13 + cmp r1, r0, lsl #14 + bcc .L_divide_b14 + cmp r1, r0, lsl #15 + bcc .L_divide_b15 + cmp r1, r0, lsl #16 + bcc .L_divide_b16 + cmp r1, r0, lsl #17 + bcc .L_divide_b17 + cmp r1, r0, lsl #18 + bcc .L_divide_b18 + cmp r1, r0, lsl #19 + bcc .L_divide_b19 + cmp r1, r0, lsl #20 + bcc .L_divide_b20 + cmp r1, r0, lsl #21 + bcc .L_divide_b21 + cmp r1, r0, lsl #22 + bcc .L_divide_b22 + cmp r1, r0, lsl #23 + bcc .L_divide_b23 + cmp r1, r0, lsl #24 + bcc .L_divide_b24 + cmp r1, r0, lsl #25 + bcc .L_divide_b25 + cmp r1, r0, lsl #26 + bcc .L_divide_b26 + cmp r1, r0, lsl #27 + bcc .L_divide_b27 + cmp r1, r0, lsl #28 + bcc .L_divide_b28 + cmp r1, r0, lsl #29 + bcc .L_divide_b29 + cmp r1, r0, lsl #30 + bcc .L_divide_b30 +.L_divide_b32: + cmp r1, r0, lsl #31 + subhs r1, r1,r0, lsl #31 + addhs r3, r3,r2, lsl #31 +.L_divide_b31: + cmp r1, r0, lsl #30 + subhs r1, r1,r0, lsl #30 + addhs r3, r3,r2, lsl #30 +.L_divide_b30: + cmp r1, r0, lsl #29 + subhs r1, r1,r0, lsl #29 + addhs r3, r3,r2, lsl #29 +.L_divide_b29: + cmp r1, r0, lsl #28 + subhs r1, r1,r0, lsl #28 + addhs r3, r3,r2, lsl #28 +.L_divide_b28: + cmp r1, r0, lsl #27 + subhs r1, r1,r0, lsl #27 + addhs r3, r3,r2, lsl #27 +.L_divide_b27: + cmp r1, r0, lsl #26 + subhs r1, r1,r0, lsl #26 + addhs r3, r3,r2, lsl #26 +.L_divide_b26: + cmp r1, r0, lsl #25 + subhs r1, r1,r0, lsl #25 + addhs r3, r3,r2, lsl #25 +.L_divide_b25: + cmp r1, r0, lsl #24 + subhs r1, r1,r0, lsl #24 + addhs r3, r3,r2, lsl #24 +.L_divide_b24: + cmp r1, r0, lsl #23 + subhs r1, r1,r0, lsl #23 + addhs r3, r3,r2, lsl #23 +.L_divide_b23: + cmp r1, r0, lsl #22 + subhs r1, r1,r0, lsl #22 + addhs r3, r3,r2, lsl #22 +.L_divide_b22: + cmp r1, r0, lsl #21 + subhs r1, r1,r0, lsl #21 + addhs r3, r3,r2, lsl #21 +.L_divide_b21: + cmp r1, r0, lsl #20 + subhs r1, r1,r0, lsl #20 + addhs r3, r3,r2, lsl #20 +.L_divide_b20: + cmp r1, r0, lsl #19 + subhs r1, r1,r0, lsl #19 + addhs r3, r3,r2, lsl #19 +.L_divide_b19: + cmp r1, r0, lsl #18 + subhs r1, r1,r0, lsl #18 + addhs r3, r3,r2, lsl #18 +.L_divide_b18: + cmp r1, r0, lsl #17 + subhs r1, r1,r0, lsl #17 + addhs r3, r3,r2, lsl #17 +.L_divide_b17: + cmp r1, r0, lsl #16 + subhs r1, r1,r0, lsl #16 + addhs r3, r3,r2, lsl #16 +.L_divide_b16: + cmp r1, r0, lsl #15 + subhs r1, r1,r0, lsl #15 + addhs r3, r3,r2, lsl #15 +.L_divide_b15: + cmp r1, r0, lsl #14 + subhs r1, r1,r0, lsl #14 + addhs r3, r3,r2, lsl #14 +.L_divide_b14: + cmp r1, r0, lsl #13 + subhs r1, r1,r0, lsl #13 + addhs r3, r3,r2, lsl #13 +.L_divide_b13: + cmp r1, r0, lsl #12 + subhs r1, r1,r0, lsl #12 + addhs r3, r3,r2, lsl #12 +.L_divide_b12: + cmp r1, r0, lsl #11 + subhs r1, r1,r0, lsl #11 + addhs r3, r3,r2, lsl #11 +.L_divide_b11: + cmp r1, r0, lsl #10 + subhs r1, r1,r0, lsl #10 + addhs r3, r3,r2, lsl #10 +.L_divide_b10: + cmp r1, r0, lsl #9 + subhs r1, r1,r0, lsl #9 + addhs r3, r3,r2, lsl #9 +.L_divide_b9: + cmp r1, r0, lsl #8 + subhs r1, r1,r0, lsl #8 + addhs r3, r3,r2, lsl #8 +.L_divide_b8: + cmp r1, r0, lsl #7 + subhs r1, r1,r0, lsl #7 + addhs r3, r3,r2, lsl #7 +.L_divide_b7: + cmp r1, r0, lsl #6 + subhs r1, r1,r0, lsl #6 + addhs r3, r3,r2, lsl #6 +.L_divide_b6: + cmp r1, r0, lsl #5 + subhs r1, r1,r0, lsl #5 + addhs r3, r3,r2, lsl #5 +.L_divide_b5: + cmp r1, r0, lsl #4 + subhs r1, r1,r0, lsl #4 + addhs r3, r3,r2, lsl #4 +.L_divide_b4: + cmp r1, r0, lsl #3 + subhs r1, r1,r0, lsl #3 + addhs r3, r3,r2, lsl #3 +.L_divide_b3: + cmp r1, r0, lsl #2 + subhs r1, r1,r0, lsl #2 + addhs r3, r3,r2, lsl #2 +.L_divide_b2: + cmp r1, r0, lsl #1 + subhs r1, r1,r0, lsl #1 + addhs r3, r3,r2, lsl #1 +.L_divide_b1: + cmp r1, r0 + subhs r1, r1, r0 + addhs r3, r3, r2 +.L_divide_b0: + + tst ip, #0x20000000 + bne .L_udivide_l1 + mov r0, r3 + cmp ip, #0 + rsbmi r1, r1, #0 + movs ip, ip, lsl #1 + bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */ + rsbmi r0, r0, #0 + mov pc, lr + +.L_udivide_l1: + tst ip, #0x10000000 + mov r1, r1, lsl #1 + orrne r1, r1, #1 + mov r3, r3, lsl #1 + cmp r1, r0 + subhs r1, r1, r0 + addhs r3, r3, r2 + mov r0, r3 + mov pc, lr +END(__aeabi_idiv) +END(__aeabi_idivmod) +END(__divsi3) diff --git a/arch/arm/arm/ldivmod.S b/arch/arm/arm/ldivmod.S new file mode 100644 index 0000000..180227c --- /dev/null +++ b/arch/arm/arm/ldivmod.S @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright (C) 2012 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#define ENTRY_NP(symbol) \ + .globl symbol; \ + symbol: + +#define END(symbol) + +/* + * These calculate: + * q = n / m + * With a remainer r. + * + * They take n in {r0, r1} and m in {r2, r3} then pass them into the + * helper function. The hepler functions return q in {r0, r1} as + * required by the API spec however r is returned on the stack. The + * ABI required us to return r in {r2, r3}. + * + * We need to allocate 8 bytes on the stack to store r, the link + * register, and a pointer to the space where the helper function + * will write r to. After returning from the helper fuinction we load + * the old link register and r from the stack and return. + */ +ENTRY_NP(__aeabi_ldivmod) + sub sp, sp, #8 /* Space for the remainder */ + stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ + bl __kern_ldivmod + ldr lr, [sp, #4] /* Restore lr */ + add sp, sp, #8 /* Move sp to the remainder value */ + ldmfd sp!, {r2, r3} /* Load the remainder */ + mov pc, lr +END(__aeabi_ldivmod) + +ENTRY_NP(__aeabi_uldivmod) + sub sp, sp, #8 /* Space for the remainder */ + stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ + bl __qdivrem + ldr lr, [sp, #4] /* Restore lr */ + add sp, sp, #8 /* Move sp to the remainder value */ + ldmfd sp!, {r2, r3} /* Load the remainder */ + mov pc, lr +END(__aeabi_uldivmod) diff --git a/arch/arm/arm/ldivmod_helper.c b/arch/arm/arm/ldivmod_helper.c new file mode 100644 index 0000000..098523e --- /dev/null +++ b/arch/arm/arm/ldivmod_helper.c @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright (C) 2012 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <uk/arch/types.h> + +__u64 __qdivrem(__u64 u, __u64 v, __u64 *rem); + +#ifndef HAVE_LIBC +__s64 __divdi3(__s64 a, __s64 b) +{ + __u64 ua, ub, uq; + int neg; + + if (a < 0) + ua = -(__u64)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(__u64)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (__u64 *)0); + return neg ? -uq : uq; +} +#endif + +/* + * Helper for __aeabi_ldivmod. + * TODO: __divdi3 calls __qdivrem. We should do the same and use the + * remainder value rather than re-calculating it. + */ +long long __kern_ldivmod(long long, long long, long long *); + +long long __kern_ldivmod(long long n, long long m, long long *rem) +{ + long long q; + + q = __divdi3(n, m); /* q = n / m */ + *rem = n - m * q; + + return q; +} diff --git a/arch/arm/arm/qdivrem.c b/arch/arm/arm/qdivrem.c new file mode 100644 index 0000000..e7d1471 --- /dev/null +++ b/arch/arm/arm/qdivrem.c @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <uk/arch/types.h> +#include <uk/arch/limits.h> + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ + +/* + * From + * @(#)quad.h 8.1 (Berkeley) 6/4/93 + */ + +#ifdef __BIG_ENDIAN +#define _QUAD_HIGHWORD 0 +#define _QUAD_LOWWORD 1 +#else /* __LITTLE_ENDIAN */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +#endif + +/* + * Define high and low longwords. + */ +#define QUADH _QUAD_HIGHWORD +#define QUADL _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(__s64) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +#define DIGIT_BASE (1 << HALF_BITS) /* digit base */ +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + __s64 q; /* as a (signed) quad */ + __s64 uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; + +#define B (1 << HALF_BITS) /* digit base */ + +/* select a type for digits in base B: use unsigned short if they fit */ +#if __UL_MAX == 0xffffffff && __US_MAX >= 0xffff +typedef unsigned short digit; +#else +typedef unsigned long digit; +#endif + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void __shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +__u64 __qdivrem(__u64 uq, __u64 vq, __u64 *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + unsigned long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[QUADH] = tmp.ul[QUADL] = 1 / zero; + if (arq) + *arq = uq; + return tmp.q; + } + if (uq < vq) { + if (arq) + *arq = uq; + return 0; + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[QUADH]); + u[2] = LHALF(tmp.ul[QUADH]); + u[3] = HHALF(tmp.ul[QUADL]); + u[4] = LHALF(tmp.ul[QUADL]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[QUADH]); + v[2] = LHALF(tmp.ul[QUADH]); + v[3] = HHALF(tmp.ul[QUADL]); + v[4] = LHALF(tmp.ul[QUADL]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + unsigned long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[QUADH] = COMBINE(q1, q2); + tmp.ul[QUADL] = COMBINE(q3, q4); + return tmp.q; + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + __shl(&u[0], m + n, d); /* u <<= d */ + __shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + unsigned long nn = COMBINE(uj0, uj1); + + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { +qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[QUADH] = COMBINE(uspace[1], uspace[2]); + tmp.ul[QUADL] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[QUADH] = COMBINE(qspace[1], qspace[2]); + tmp.ul[QUADL] = COMBINE(qspace[3], qspace[4]); + return tmp.q; +} diff --git a/arch/arm/divsi3.S b/arch/arm/divsi3.S deleted file mode 100644 index fa92233..0000000 --- a/arch/arm/divsi3.S +++ /dev/null @@ -1,404 +0,0 @@ -/* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */ - -/*- - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#define ENTRY_NP(symbol) \ - .globl symbol; \ - symbol: - -#define END(symbol) - -/* - * stack is aligned as there's a possibility of branching to L_overflow - * which makes a C call - */ - -ENTRY_NP(__umodsi3) - stmfd sp!, {lr} - sub sp, sp, #4 /* align stack */ - bl .L_udivide - add sp, sp, #4 /* unalign stack */ - mov r0, r1 - ldmfd sp!, {pc} -END(__umodsi3) - -ENTRY_NP(__modsi3) - stmfd sp!, {lr} - sub sp, sp, #4 /* align stack */ - bl .L_divide - add sp, sp, #4 /* unalign stack */ - mov r0, r1 - ldmfd sp!, {pc} - -.L_overflow: - /* XXX should cause a fatal error */ - mvn r0, #0 - mov pc, lr - -END(__modsi3) - -#ifdef __ARM_EABI__ -ENTRY_NP(__aeabi_uidiv) -ENTRY_NP(__aeabi_uidivmod) -#endif -ENTRY_NP(__udivsi3) -.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */ - eor r0, r1, r0 - eor r1, r0, r1 - eor r0, r1, r0 - /* r0 = r1 / r0; r1 = r1 % r0 */ - cmp r0, #1 - bcc .L_overflow - beq .L_divide_l0 - mov ip, #0 - movs r1, r1 - bpl .L_divide_l1 - orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */ - movs r1, r1, lsr #1 - orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */ - b .L_divide_l1 - -.L_divide_l0: /* r0 == 1 */ - mov r0, r1 - mov r1, #0 - mov pc, lr -#ifdef __ARM_EABI__ -END(__aeabi_uidiv) -END(__aeabi_uidivmod) -#endif -END(__udivsi3) - -#ifdef __ARM_EABI__ -ENTRY_NP(__aeabi_idiv) -ENTRY_NP(__aeabi_idivmod) -#endif -ENTRY_NP(__divsi3) -.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */ - eor r0, r1, r0 - eor r1, r0, r1 - eor r0, r1, r0 - /* r0 = r1 / r0; r1 = r1 % r0 */ - cmp r0, #1 - bcc .L_overflow - beq .L_divide_l0 - ands ip, r0, #0x80000000 - rsbmi r0, r0, #0 - ands r2, r1, #0x80000000 - eor ip, ip, r2 - rsbmi r1, r1, #0 - orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */ - /* ip bit 0x80000000 = -ve remainder */ - -.L_divide_l1: - mov r2, #1 - mov r3, #0 - - /* - * If the highest bit of the dividend is set, we have to be - * careful when shifting the divisor. Test this. - */ - movs r1,r1 - bpl .L_old_code - - /* - * At this point, the highest bit of r1 is known to be set. - * We abuse this below in the tst instructions. - */ - tst r1, r0 /*, lsl #0 */ - bmi .L_divide_b1 - tst r1, r0, lsl #1 - bmi .L_divide_b2 - tst r1, r0, lsl #2 - bmi .L_divide_b3 - tst r1, r0, lsl #3 - bmi .L_divide_b4 - tst r1, r0, lsl #4 - bmi .L_divide_b5 - tst r1, r0, lsl #5 - bmi .L_divide_b6 - tst r1, r0, lsl #6 - bmi .L_divide_b7 - tst r1, r0, lsl #7 - bmi .L_divide_b8 - tst r1, r0, lsl #8 - bmi .L_divide_b9 - tst r1, r0, lsl #9 - bmi .L_divide_b10 - tst r1, r0, lsl #10 - bmi .L_divide_b11 - tst r1, r0, lsl #11 - bmi .L_divide_b12 - tst r1, r0, lsl #12 - bmi .L_divide_b13 - tst r1, r0, lsl #13 - bmi .L_divide_b14 - tst r1, r0, lsl #14 - bmi .L_divide_b15 - tst r1, r0, lsl #15 - bmi .L_divide_b16 - tst r1, r0, lsl #16 - bmi .L_divide_b17 - tst r1, r0, lsl #17 - bmi .L_divide_b18 - tst r1, r0, lsl #18 - bmi .L_divide_b19 - tst r1, r0, lsl #19 - bmi .L_divide_b20 - tst r1, r0, lsl #20 - bmi .L_divide_b21 - tst r1, r0, lsl #21 - bmi .L_divide_b22 - tst r1, r0, lsl #22 - bmi .L_divide_b23 - tst r1, r0, lsl #23 - bmi .L_divide_b24 - tst r1, r0, lsl #24 - bmi .L_divide_b25 - tst r1, r0, lsl #25 - bmi .L_divide_b26 - tst r1, r0, lsl #26 - bmi .L_divide_b27 - tst r1, r0, lsl #27 - bmi .L_divide_b28 - tst r1, r0, lsl #28 - bmi .L_divide_b29 - tst r1, r0, lsl #29 - bmi .L_divide_b30 - tst r1, r0, lsl #30 - bmi .L_divide_b31 -/* - * instead of: - * tst r1, r0, lsl #31 - * bmi .L_divide_b32 - */ - b .L_divide_b32 - -.L_old_code: - cmp r1, r0 - bcc .L_divide_b0 - cmp r1, r0, lsl #1 - bcc .L_divide_b1 - cmp r1, r0, lsl #2 - bcc .L_divide_b2 - cmp r1, r0, lsl #3 - bcc .L_divide_b3 - cmp r1, r0, lsl #4 - bcc .L_divide_b4 - cmp r1, r0, lsl #5 - bcc .L_divide_b5 - cmp r1, r0, lsl #6 - bcc .L_divide_b6 - cmp r1, r0, lsl #7 - bcc .L_divide_b7 - cmp r1, r0, lsl #8 - bcc .L_divide_b8 - cmp r1, r0, lsl #9 - bcc .L_divide_b9 - cmp r1, r0, lsl #10 - bcc .L_divide_b10 - cmp r1, r0, lsl #11 - bcc .L_divide_b11 - cmp r1, r0, lsl #12 - bcc .L_divide_b12 - cmp r1, r0, lsl #13 - bcc .L_divide_b13 - cmp r1, r0, lsl #14 - bcc .L_divide_b14 - cmp r1, r0, lsl #15 - bcc .L_divide_b15 - cmp r1, r0, lsl #16 - bcc .L_divide_b16 - cmp r1, r0, lsl #17 - bcc .L_divide_b17 - cmp r1, r0, lsl #18 - bcc .L_divide_b18 - cmp r1, r0, lsl #19 - bcc .L_divide_b19 - cmp r1, r0, lsl #20 - bcc .L_divide_b20 - cmp r1, r0, lsl #21 - bcc .L_divide_b21 - cmp r1, r0, lsl #22 - bcc .L_divide_b22 - cmp r1, r0, lsl #23 - bcc .L_divide_b23 - cmp r1, r0, lsl #24 - bcc .L_divide_b24 - cmp r1, r0, lsl #25 - bcc .L_divide_b25 - cmp r1, r0, lsl #26 - bcc .L_divide_b26 - cmp r1, r0, lsl #27 - bcc .L_divide_b27 - cmp r1, r0, lsl #28 - bcc .L_divide_b28 - cmp r1, r0, lsl #29 - bcc .L_divide_b29 - cmp r1, r0, lsl #30 - bcc .L_divide_b30 -.L_divide_b32: - cmp r1, r0, lsl #31 - subhs r1, r1,r0, lsl #31 - addhs r3, r3,r2, lsl #31 -.L_divide_b31: - cmp r1, r0, lsl #30 - subhs r1, r1,r0, lsl #30 - addhs r3, r3,r2, lsl #30 -.L_divide_b30: - cmp r1, r0, lsl #29 - subhs r1, r1,r0, lsl #29 - addhs r3, r3,r2, lsl #29 -.L_divide_b29: - cmp r1, r0, lsl #28 - subhs r1, r1,r0, lsl #28 - addhs r3, r3,r2, lsl #28 -.L_divide_b28: - cmp r1, r0, lsl #27 - subhs r1, r1,r0, lsl #27 - addhs r3, r3,r2, lsl #27 -.L_divide_b27: - cmp r1, r0, lsl #26 - subhs r1, r1,r0, lsl #26 - addhs r3, r3,r2, lsl #26 -.L_divide_b26: - cmp r1, r0, lsl #25 - subhs r1, r1,r0, lsl #25 - addhs r3, r3,r2, lsl #25 -.L_divide_b25: - cmp r1, r0, lsl #24 - subhs r1, r1,r0, lsl #24 - addhs r3, r3,r2, lsl #24 -.L_divide_b24: - cmp r1, r0, lsl #23 - subhs r1, r1,r0, lsl #23 - addhs r3, r3,r2, lsl #23 -.L_divide_b23: - cmp r1, r0, lsl #22 - subhs r1, r1,r0, lsl #22 - addhs r3, r3,r2, lsl #22 -.L_divide_b22: - cmp r1, r0, lsl #21 - subhs r1, r1,r0, lsl #21 - addhs r3, r3,r2, lsl #21 -.L_divide_b21: - cmp r1, r0, lsl #20 - subhs r1, r1,r0, lsl #20 - addhs r3, r3,r2, lsl #20 -.L_divide_b20: - cmp r1, r0, lsl #19 - subhs r1, r1,r0, lsl #19 - addhs r3, r3,r2, lsl #19 -.L_divide_b19: - cmp r1, r0, lsl #18 - subhs r1, r1,r0, lsl #18 - addhs r3, r3,r2, lsl #18 -.L_divide_b18: - cmp r1, r0, lsl #17 - subhs r1, r1,r0, lsl #17 - addhs r3, r3,r2, lsl #17 -.L_divide_b17: - cmp r1, r0, lsl #16 - subhs r1, r1,r0, lsl #16 - addhs r3, r3,r2, lsl #16 -.L_divide_b16: - cmp r1, r0, lsl #15 - subhs r1, r1,r0, lsl #15 - addhs r3, r3,r2, lsl #15 -.L_divide_b15: - cmp r1, r0, lsl #14 - subhs r1, r1,r0, lsl #14 - addhs r3, r3,r2, lsl #14 -.L_divide_b14: - cmp r1, r0, lsl #13 - subhs r1, r1,r0, lsl #13 - addhs r3, r3,r2, lsl #13 -.L_divide_b13: - cmp r1, r0, lsl #12 - subhs r1, r1,r0, lsl #12 - addhs r3, r3,r2, lsl #12 -.L_divide_b12: - cmp r1, r0, lsl #11 - subhs r1, r1,r0, lsl #11 - addhs r3, r3,r2, lsl #11 -.L_divide_b11: - cmp r1, r0, lsl #10 - subhs r1, r1,r0, lsl #10 - addhs r3, r3,r2, lsl #10 -.L_divide_b10: - cmp r1, r0, lsl #9 - subhs r1, r1,r0, lsl #9 - addhs r3, r3,r2, lsl #9 -.L_divide_b9: - cmp r1, r0, lsl #8 - subhs r1, r1,r0, lsl #8 - addhs r3, r3,r2, lsl #8 -.L_divide_b8: - cmp r1, r0, lsl #7 - subhs r1, r1,r0, lsl #7 - addhs r3, r3,r2, lsl #7 -.L_divide_b7: - cmp r1, r0, lsl #6 - subhs r1, r1,r0, lsl #6 - addhs r3, r3,r2, lsl #6 -.L_divide_b6: - cmp r1, r0, lsl #5 - subhs r1, r1,r0, lsl #5 - addhs r3, r3,r2, lsl #5 -.L_divide_b5: - cmp r1, r0, lsl #4 - subhs r1, r1,r0, lsl #4 - addhs r3, r3,r2, lsl #4 -.L_divide_b4: - cmp r1, r0, lsl #3 - subhs r1, r1,r0, lsl #3 - addhs r3, r3,r2, lsl #3 -.L_divide_b3: - cmp r1, r0, lsl #2 - subhs r1, r1,r0, lsl #2 - addhs r3, r3,r2, lsl #2 -.L_divide_b2: - cmp r1, r0, lsl #1 - subhs r1, r1,r0, lsl #1 - addhs r3, r3,r2, lsl #1 -.L_divide_b1: - cmp r1, r0 - subhs r1, r1, r0 - addhs r3, r3, r2 -.L_divide_b0: - - tst ip, #0x20000000 - bne .L_udivide_l1 - mov r0, r3 - cmp ip, #0 - rsbmi r1, r1, #0 - movs ip, ip, lsl #1 - bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */ - rsbmi r0, r0, #0 - mov pc, lr - -.L_udivide_l1: - tst ip, #0x10000000 - mov r1, r1, lsl #1 - orrne r1, r1, #1 - mov r3, r3, lsl #1 - cmp r1, r0 - subhs r1, r1, r0 - addhs r3, r3, r2 - mov r0, r3 - mov pc, lr -END(__aeabi_idiv) -END(__aeabi_idivmod) -END(__divsi3) - diff --git a/arch/arm/ldivmod.S b/arch/arm/ldivmod.S deleted file mode 100644 index 3c3083b..0000000 --- a/arch/arm/ldivmod.S +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause */ -/* - * Copyright (C) 2012 Andrew Turner - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#define ENTRY_NP(symbol) \ - .globl symbol; \ - symbol: - -#define END(symbol) - -/* - * These calculate: - * q = n / m - * With a remainer r. - * - * They take n in {r0, r1} and m in {r2, r3} then pass them into the - * helper function. The hepler functions return q in {r0, r1} as - * required by the API spec however r is returned on the stack. The - * ABI required us to return r in {r2, r3}. - * - * We need to allocate 8 bytes on the stack to store r, the link - * register, and a pointer to the space where the helper function - * will write r to. After returning from the helper fuinction we load - * the old link register and r from the stack and return. - */ -ENTRY_NP(__aeabi_ldivmod) - sub sp, sp, #8 /* Space for the remainder */ - stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ - bl __kern_ldivmod - ldr lr, [sp, #4] /* Restore lr */ - add sp, sp, #8 /* Move sp to the remainder value */ - ldmfd sp!, {r2, r3} /* Load the remainder */ - mov pc, lr -END(__aeabi_ldivmod) - -ENTRY_NP(__aeabi_uldivmod) - sub sp, sp, #8 /* Space for the remainder */ - stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ - bl __qdivrem - ldr lr, [sp, #4] /* Restore lr */ - add sp, sp, #8 /* Move sp to the remainder value */ - ldmfd sp!, {r2, r3} /* Load the remainder */ - mov pc, lr -END(__aeabi_uldivmod) diff --git a/arch/arm/ldivmod_helper.c b/arch/arm/ldivmod_helper.c deleted file mode 100644 index 098523e..0000000 --- a/arch/arm/ldivmod_helper.c +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause */ -/* - * Copyright (C) 2012 Andrew Turner - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include <uk/arch/types.h> - -__u64 __qdivrem(__u64 u, __u64 v, __u64 *rem); - -#ifndef HAVE_LIBC -__s64 __divdi3(__s64 a, __s64 b) -{ - __u64 ua, ub, uq; - int neg; - - if (a < 0) - ua = -(__u64)a, neg = 1; - else - ua = a, neg = 0; - if (b < 0) - ub = -(__u64)b, neg ^= 1; - else - ub = b; - uq = __qdivrem(ua, ub, (__u64 *)0); - return neg ? -uq : uq; -} -#endif - -/* - * Helper for __aeabi_ldivmod. - * TODO: __divdi3 calls __qdivrem. We should do the same and use the - * remainder value rather than re-calculating it. - */ -long long __kern_ldivmod(long long, long long, long long *); - -long long __kern_ldivmod(long long n, long long m, long long *rem) -{ - long long q; - - q = __divdi3(n, m); /* q = n / m */ - *rem = n - m * q; - - return q; -} diff --git a/arch/arm/qdivrem.c b/arch/arm/qdivrem.c deleted file mode 100644 index e7d1471..0000000 --- a/arch/arm/qdivrem.c +++ /dev/null @@ -1,324 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ -/*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <uk/arch/types.h> -#include <uk/arch/limits.h> - -/* - * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), - * section 4.3.1, pp. 257--259. - */ - -/* - * From - * @(#)quad.h 8.1 (Berkeley) 6/4/93 - */ - -#ifdef __BIG_ENDIAN -#define _QUAD_HIGHWORD 0 -#define _QUAD_LOWWORD 1 -#else /* __LITTLE_ENDIAN */ -#define _QUAD_HIGHWORD 1 -#define _QUAD_LOWWORD 0 -#endif - -/* - * Define high and low longwords. - */ -#define QUADH _QUAD_HIGHWORD -#define QUADL _QUAD_LOWWORD - -/* - * Total number of bits in a quad_t and in the pieces that make it up. - * These are used for shifting, and also below for halfword extraction - * and assembly. - */ -#define CHAR_BIT 8 /* number of bits in a char */ -#define QUAD_BITS (sizeof(__s64) * CHAR_BIT) -#define LONG_BITS (sizeof(long) * CHAR_BIT) -#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) - -#define DIGIT_BASE (1 << HALF_BITS) /* digit base */ -/* - * Extract high and low shortwords from longword, and move low shortword of - * longword to upper half of long, i.e., produce the upper longword of - * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) - * - * These are used in the multiply code, to split a longword into upper - * and lower halves, and to reassemble a product as a quad_t, shifted left - * (sizeof(long)*CHAR_BIT/2). - */ -#define HHALF(x) ((x) >> HALF_BITS) -#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) -#define LHUP(x) ((x) << HALF_BITS) - -#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) - -/* - * Depending on the desired operation, we view a `long long' (aka quad_t) in - * one or more of the following formats. - */ -union uu { - __s64 q; /* as a (signed) quad */ - __s64 uq; /* as an unsigned quad */ - long sl[2]; /* as two signed longs */ - unsigned long ul[2]; /* as two unsigned longs */ -}; - -#define B (1 << HALF_BITS) /* digit base */ - -/* select a type for digits in base B: use unsigned short if they fit */ -#if __UL_MAX == 0xffffffff && __US_MAX >= 0xffff -typedef unsigned short digit; -#else -typedef unsigned long digit; -#endif - -/* - * Shift p[0]..p[len] left `sh' bits, ignoring any bits that - * `fall out' the left (there never will be any such anyway). - * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. - */ -static void __shl(register digit *p, register int len, register int sh) -{ - register int i; - - for (i = 0; i < len; i++) - p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); - p[i] = LHALF(p[i] << sh); -} - -/* - * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. - * - * We do this in base 2-sup-HALF_BITS, so that all intermediate products - * fit within u_long. As a consequence, the maximum length dividend and - * divisor are 4 `digits' in this base (they are shorter if they have - * leading zeros). - */ -__u64 __qdivrem(__u64 uq, __u64 vq, __u64 *arq) -{ - union uu tmp; - digit *u, *v, *q; - register digit v1, v2; - unsigned long qhat, rhat, t; - int m, n, d, j, i; - digit uspace[5], vspace[5], qspace[5]; - - /* - * Take care of special cases: divide by zero, and u < v. - */ - if (vq == 0) { - /* divide by zero. */ - static volatile const unsigned int zero = 0; - - tmp.ul[QUADH] = tmp.ul[QUADL] = 1 / zero; - if (arq) - *arq = uq; - return tmp.q; - } - if (uq < vq) { - if (arq) - *arq = uq; - return 0; - } - u = &uspace[0]; - v = &vspace[0]; - q = &qspace[0]; - - /* - * Break dividend and divisor into digits in base B, then - * count leading zeros to determine m and n. When done, we - * will have: - * u = (u[1]u[2]...u[m+n]) sub B - * v = (v[1]v[2]...v[n]) sub B - * v[1] != 0 - * 1 < n <= 4 (if n = 1, we use a different division algorithm) - * m >= 0 (otherwise u < v, which we already checked) - * m + n = 4 - * and thus - * m = 4 - n <= 2 - */ - tmp.uq = uq; - u[0] = 0; - u[1] = HHALF(tmp.ul[QUADH]); - u[2] = LHALF(tmp.ul[QUADH]); - u[3] = HHALF(tmp.ul[QUADL]); - u[4] = LHALF(tmp.ul[QUADL]); - tmp.uq = vq; - v[1] = HHALF(tmp.ul[QUADH]); - v[2] = LHALF(tmp.ul[QUADH]); - v[3] = HHALF(tmp.ul[QUADL]); - v[4] = LHALF(tmp.ul[QUADL]); - for (n = 4; v[1] == 0; v++) { - if (--n == 1) { - unsigned long rbj; /* r*B+u[j] (not root boy jim) */ - digit q1, q2, q3, q4; - - /* - * Change of plan, per exercise 16. - * r = 0; - * for j = 1..4: - * q[j] = floor((r*B + u[j]) / v), - * r = (r*B + u[j]) % v; - * We unroll this completely here. - */ - t = v[2]; /* nonzero, by definition */ - q1 = u[1] / t; - rbj = COMBINE(u[1] % t, u[2]); - q2 = rbj / t; - rbj = COMBINE(rbj % t, u[3]); - q3 = rbj / t; - rbj = COMBINE(rbj % t, u[4]); - q4 = rbj / t; - if (arq) - *arq = rbj % t; - tmp.ul[QUADH] = COMBINE(q1, q2); - tmp.ul[QUADL] = COMBINE(q3, q4); - return tmp.q; - } - } - - /* - * By adjusting q once we determine m, we can guarantee that - * there is a complete four-digit quotient at &qspace[1] when - * we finally stop. - */ - for (m = 4 - n; u[1] == 0; u++) - m--; - for (i = 4 - m; --i >= 0;) - q[i] = 0; - q += 4 - m; - - /* - * Here we run Program D, translated from MIX to C and acquiring - * a few minor changes. - * - * D1: choose multiplier 1 << d to ensure v[1] >= B/2. - */ - d = 0; - for (t = v[1]; t < B / 2; t <<= 1) - d++; - if (d > 0) { - __shl(&u[0], m + n, d); /* u <<= d */ - __shl(&v[1], n - 1, d); /* v <<= d */ - } - /* - * D2: j = 0. - */ - j = 0; - v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ - v2 = v[2]; /* for D3 */ - do { - register digit uj0, uj1, uj2; - - /* - * D3: Calculate qhat (\^q, in TeX notation). - * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and - * let rhat = (u[j]*B + u[j+1]) mod v[1]. - * While rhat < B and v[2]*qhat > rhat*B+u[j+2], - * decrement qhat and increase rhat correspondingly. - * Note that if rhat >= B, v[2]*qhat < rhat*B. - */ - uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ - uj1 = u[j + 1]; /* for D3 only */ - uj2 = u[j + 2]; /* for D3 only */ - if (uj0 == v1) { - qhat = B; - rhat = uj1; - goto qhat_too_big; - } else { - unsigned long nn = COMBINE(uj0, uj1); - - qhat = nn / v1; - rhat = nn % v1; - } - while (v2 * qhat > COMBINE(rhat, uj2)) { -qhat_too_big: - qhat--; - if ((rhat += v1) >= B) - break; - } - /* - * D4: Multiply and subtract. - * The variable `t' holds any borrows across the loop. - * We split this up so that we do not require v[0] = 0, - * and to eliminate a final special case. - */ - for (t = 0, i = n; i > 0; i--) { - t = u[i + j] - v[i] * qhat - t; - u[i + j] = LHALF(t); - t = (B - HHALF(t)) & (B - 1); - } - t = u[j] - t; - u[j] = LHALF(t); - /* - * D5: test remainder. - * There is a borrow if and only if HHALF(t) is nonzero; - * in that (rare) case, qhat was too large (by exactly 1). - * Fix it by adding v[1..n] to u[j..j+n]. - */ - if (HHALF(t)) { - qhat--; - for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ - t += u[i + j] + v[i]; - u[i + j] = LHALF(t); - t = HHALF(t); - } - u[j] = LHALF(u[j] + t); - } - q[j] = qhat; - } while (++j <= m); /* D7: loop on j. */ - - /* - * If caller wants the remainder, we have to calculate it as - * u[m..m+n] >> d (this is at most n digits and thus fits in - * u[m+1..m+n], but we may need more source digits). - */ - if (arq) { - if (d) { - for (i = m + n; i > m; --i) - u[i] = (u[i] >> d) | - LHALF(u[i - 1] << (HALF_BITS - d)); - u[i] = 0; - } - tmp.ul[QUADH] = COMBINE(uspace[1], uspace[2]); - tmp.ul[QUADL] = COMBINE(uspace[3], uspace[4]); - *arq = tmp.q; - } - - tmp.ul[QUADH] = COMBINE(qspace[1], qspace[2]); - tmp.ul[QUADL] = COMBINE(qspace[3], qspace[4]); - return tmp.q; -} diff --git a/arch/x86/Compiler.uk b/arch/x86/Compiler.uk new file mode 100644 index 0000000..eee000d --- /dev/null +++ b/arch/x86/Compiler.uk @@ -0,0 +1,4 @@ +# Include forward declarations by corresponding architecutes +ifeq ($(ARCH_X86_64),y) + include $(UK_BASE)/arch/x86/x86_64/Compiler.uk +endif diff --git a/arch/x86/Makefile.uk b/arch/x86/Makefile.uk new file mode 100644 index 0000000..168e1b5 --- /dev/null +++ b/arch/x86/Makefile.uk @@ -0,0 +1,4 @@ +# Include forward declarations by corresponding architecutes +ifeq ($(ARCH_X86_64),y) + include $(UK_BASE)/arch/x86/x86_64/Makefile.uk +endif diff --git a/arch/x86/x86_64/Compiler.uk b/arch/x86/x86_64/Compiler.uk new file mode 100644 index 0000000..a5470ed --- /dev/null +++ b/arch/x86/x86_64/Compiler.uk @@ -0,0 +1,6 @@ +# set cross compile +ifeq ($(CROSS_COMPILE),) +ifneq ($(UK_ARCH),$(HOSTARCH)) + CROSS_COMPILE := x86_64-linux-gnu- +endif +endif diff --git a/arch/x86/x86_64/Config.uk b/arch/x86/x86_64/Config.uk new file mode 100644 index 0000000..ca49eed --- /dev/null +++ b/arch/x86/x86_64/Config.uk @@ -0,0 +1,89 @@ +choice + prompt "Processor Optimization" + default MARCH_GENERIC + help + Optimize the code for selected target processor + Generic x86_64 does not enable any additional CPU features + (e.g., SSE, 3DNow!) and produces highly protable code for x86. + +config MARCH_NATIVE + bool "Auto-detect host CPU" + help + Optimize compilation to host CPU + Please note that this option will fail in case of cross-compilation + +config MARCH_GENERIC + bool "Generic 64-bit" + help + Compile for 64-bit compatible x86 CPUs + +config MARCH_NOCONA + bool "Intel Pentium 4/Xeon (Nocona)" + help + Improved version of Intel Pentium 4 CPU with 64-bit extensions, MMX, SSE, SSE2 and SSE3 instruction set support. + +config MARCH_CORE2 + bool "Intel Core 2/Xeon" + help + Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support. + +config MARCH_COREI7 + bool "Intel Core i7/Xeon" + help + Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set support. + +config MARCH_COREI7AVX + bool "Intel Core i7/Xeon +AVX" + help + Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AES and PCLMUL instruction set support. + +config MARCH_COREI7AVXI + bool "Intel Core/Xeon +AVX +RDRND" + help + Intel Core CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AES, PCLMUL, FSGSBASE, RDRND and F16C instruction set support. + +config MARCH_ATOM + bool "Intel Atom 64-bit" + help + Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support. + +config MARCH_K8 + bool "AMD Athlon64/Opteron" + help + Processors based on the AMD K8 core with x86-64 instruction set support, including the AMD Opteron, Athlon 64, and Athlon 64 FX processors. (This supersets MMX, SSE, SSE2, 3DNow!, enhanced 3DNow! and 64-bit instruction set extensions.) + +config MARCH_K8SSE3 + bool "AMD Athlon64/Opteron +SSE3" + help + Improved versions of AMD K8 cores with SSE3 instruction set support. + +config MARCH_AMDFAM10 + bool "AMD Family 10h cores" + help + CPUs based on AMD Family 10h cores with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSE4A, 3DNow!, enhanced 3DNow!, ABM and 64-bit instruction set extensions.) + +config MARCH_BTVER1 + bool "AMD Family 14h cores" + help + CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit instruction set extensions.) + +config MARCH_BDVER1 + bool "AMD Family 15h cores (bdver1)" + help + CPUs based on AMD Family 15h cores with x86-64 instruction set support. (This supersets FMA4, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions.) + +config MARCH_BDVER2 + bool "AMD Family 15h cores (bdver2)" + help + AMD Family 15h core based CPUs with x86-64 instruction set support. (This supersets BMI, TBM, F16C, FMA, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions.) + +config MARCH_BDVER3 + bool "AMD Family 15h cores (bdver3)" + help + AMD Family 15h core based CPUs with x86-64 instruction set support. (This supersets BMI, TBM, F16C, FMA, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions. + +config MARCH_BTVER2 + bool "AMD Family 16h cores" + help + CPUs based on AMD Family 16h cores with x86-64 instruction set support. This includes MOVBE, F16C, BMI, AVX, PCL_MUL, AES, SSE4.2, SSE4.1, CX16, ABM, SSE4A, SSSE3, SSE3, SSE2, SSE, MMX and 64-bit instruction set extensions. +endchoice diff --git a/arch/x86/x86_64/Makefile.uk b/arch/x86/x86_64/Makefile.uk new file mode 100644 index 0000000..8c9bfe3 --- /dev/null +++ b/arch/x86/x86_64/Makefile.uk @@ -0,0 +1,37 @@ +ASFLAGS += -D__X86_64__ +ASFLAGS += -m64 +CFLAGS += -D__X86_64__ +CFLAGS += -m64 -mno-red-zone -fno-reorder-blocks -fno-asynchronous-unwind-tables +CXXFLAGS += -D__X86_64__ +CXXFLAGS += -m64 -mno-red-zone -fno-reorder-blocks -fno-asynchronous-unwind-tables + +CFLAGS-$(MARCH_GENERIC) += -mtune=generic +CXXFLAGS-$(MARCH_GENERIC) += -mtune=generic +CFLAGS-$(MARCH_NOCONA) += -march=nocona +CXXFLAGS-$(MARCH_NOCONA) += -march=nocona +CFLAGS-$(MARCH_CORE2) += -march=core2 +CXXFLAGS-$(MARCH_CORE2) += -march=core2 +CFLAGS-$(MARCH_COREI7) += -march=corei7 +CXXFLAGS-$(MARCH_COREI7) += -march=corei7 +CFLAGS-$(MARCH_COREI7AVX) += -march=corei7-avx +CXXFLAGS-$(MARCH_COREI7AVX) += -march=corei7-avx +CFLAGS-$(MARCH_COREI7AVXI) += -march=core-avx-i +CXXFLAGS-$(MARCH_COREI7AVXI) += -march=core-avx-i +CFLAGS-$(MARCH_ATOM) += -march=atom +CXXFLAGS-$(MARCH_ATOM) += -march=atom +CFLAGS-$(MARCH_K8) += -march=k8 +CXXFLAGS-$(MARCH_K8) += -march=k8 +CFLAGS-$(MARCH_K8SSE3) += -march=k8-sse3 +CXXFLAGS-$(MARCH_K8SSE3) += -march=k8-sse3 +CFLAGS-$(MARCH_AMDFAM10) += -march=amdfam10 +CXXFLAGS-$(MARCH_AMDFAM10) += -march=amdfam10 +CFLAGS-$(MARCH_BDVER1) += -march=bdver1 +CXXFLAGS-$(MARCH_BDVER1) += -march=bdver1 +CFLAGS-$(MARCH_BDVER2) += -march=bdver2 +CXXFLAGS-$(MARCH_BDVER2) += -march=bdver2 +CFLAGS-$(MARCH_BDVER3) += -march=bdver3 +CXXFLAGS-$(MARCH_BDVER3) += -march=bdver3 +CFLAGS-$(MARCH_BTVER1) += -march=btver1 +CXXFLAGS-$(MARCH_BTVER1) += -march=btver1 +CFLAGS-$(MARCH_BTVER2) += -march=btver2 +CXXFLAGS-$(MARCH_BTVER2) += -march=btver2 diff --git a/arch/x86_64/Compiler.uk b/arch/x86_64/Compiler.uk deleted file mode 100644 index a5470ed..0000000 --- a/arch/x86_64/Compiler.uk +++ /dev/null @@ -1,6 +0,0 @@ -# set cross compile -ifeq ($(CROSS_COMPILE),) -ifneq ($(UK_ARCH),$(HOSTARCH)) - CROSS_COMPILE := x86_64-linux-gnu- -endif -endif diff --git a/arch/x86_64/Config.uk b/arch/x86_64/Config.uk deleted file mode 100644 index ca49eed..0000000 --- a/arch/x86_64/Config.uk +++ /dev/null @@ -1,89 +0,0 @@ -choice - prompt "Processor Optimization" - default MARCH_GENERIC - help - Optimize the code for selected target processor - Generic x86_64 does not enable any additional CPU features - (e.g., SSE, 3DNow!) and produces highly protable code for x86. - -config MARCH_NATIVE - bool "Auto-detect host CPU" - help - Optimize compilation to host CPU - Please note that this option will fail in case of cross-compilation - -config MARCH_GENERIC - bool "Generic 64-bit" - help - Compile for 64-bit compatible x86 CPUs - -config MARCH_NOCONA - bool "Intel Pentium 4/Xeon (Nocona)" - help - Improved version of Intel Pentium 4 CPU with 64-bit extensions, MMX, SSE, SSE2 and SSE3 instruction set support. - -config MARCH_CORE2 - bool "Intel Core 2/Xeon" - help - Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support. - -config MARCH_COREI7 - bool "Intel Core i7/Xeon" - help - Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set support. - -config MARCH_COREI7AVX - bool "Intel Core i7/Xeon +AVX" - help - Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AES and PCLMUL instruction set support. - -config MARCH_COREI7AVXI - bool "Intel Core/Xeon +AVX +RDRND" - help - Intel Core CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AES, PCLMUL, FSGSBASE, RDRND and F16C instruction set support. - -config MARCH_ATOM - bool "Intel Atom 64-bit" - help - Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support. - -config MARCH_K8 - bool "AMD Athlon64/Opteron" - help - Processors based on the AMD K8 core with x86-64 instruction set support, including the AMD Opteron, Athlon 64, and Athlon 64 FX processors. (This supersets MMX, SSE, SSE2, 3DNow!, enhanced 3DNow! and 64-bit instruction set extensions.) - -config MARCH_K8SSE3 - bool "AMD Athlon64/Opteron +SSE3" - help - Improved versions of AMD K8 cores with SSE3 instruction set support. - -config MARCH_AMDFAM10 - bool "AMD Family 10h cores" - help - CPUs based on AMD Family 10h cores with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSE4A, 3DNow!, enhanced 3DNow!, ABM and 64-bit instruction set extensions.) - -config MARCH_BTVER1 - bool "AMD Family 14h cores" - help - CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit instruction set extensions.) - -config MARCH_BDVER1 - bool "AMD Family 15h cores (bdver1)" - help - CPUs based on AMD Family 15h cores with x86-64 instruction set support. (This supersets FMA4, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions.) - -config MARCH_BDVER2 - bool "AMD Family 15h cores (bdver2)" - help - AMD Family 15h core based CPUs with x86-64 instruction set support. (This supersets BMI, TBM, F16C, FMA, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions.) - -config MARCH_BDVER3 - bool "AMD Family 15h cores (bdver3)" - help - AMD Family 15h core based CPUs with x86-64 instruction set support. (This supersets BMI, TBM, F16C, FMA, AVX, XOP, LWP, AES, PCL_MUL, CX16, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM and 64-bit instruction set extensions. - -config MARCH_BTVER2 - bool "AMD Family 16h cores" - help - CPUs based on AMD Family 16h cores with x86-64 instruction set support. This includes MOVBE, F16C, BMI, AVX, PCL_MUL, AES, SSE4.2, SSE4.1, CX16, ABM, SSE4A, SSSE3, SSE3, SSE2, SSE, MMX and 64-bit instruction set extensions. -endchoice diff --git a/arch/x86_64/Makefile.uk b/arch/x86_64/Makefile.uk deleted file mode 100644 index 8c9bfe3..0000000 --- a/arch/x86_64/Makefile.uk +++ /dev/null @@ -1,37 +0,0 @@ -ASFLAGS += -D__X86_64__ -ASFLAGS += -m64 -CFLAGS += -D__X86_64__ -CFLAGS += -m64 -mno-red-zone -fno-reorder-blocks -fno-asynchronous-unwind-tables -CXXFLAGS += -D__X86_64__ -CXXFLAGS += -m64 -mno-red-zone -fno-reorder-blocks -fno-asynchronous-unwind-tables - -CFLAGS-$(MARCH_GENERIC) += -mtune=generic -CXXFLAGS-$(MARCH_GENERIC) += -mtune=generic -CFLAGS-$(MARCH_NOCONA) += -march=nocona -CXXFLAGS-$(MARCH_NOCONA) += -march=nocona -CFLAGS-$(MARCH_CORE2) += -march=core2 -CXXFLAGS-$(MARCH_CORE2) += -march=core2 -CFLAGS-$(MARCH_COREI7) += -march=corei7 -CXXFLAGS-$(MARCH_COREI7) += -march=corei7 -CFLAGS-$(MARCH_COREI7AVX) += -march=corei7-avx -CXXFLAGS-$(MARCH_COREI7AVX) += -march=corei7-avx -CFLAGS-$(MARCH_COREI7AVXI) += -march=core-avx-i -CXXFLAGS-$(MARCH_COREI7AVXI) += -march=core-avx-i -CFLAGS-$(MARCH_ATOM) += -march=atom -CXXFLAGS-$(MARCH_ATOM) += -march=atom -CFLAGS-$(MARCH_K8) += -march=k8 -CXXFLAGS-$(MARCH_K8) += -march=k8 -CFLAGS-$(MARCH_K8SSE3) += -march=k8-sse3 -CXXFLAGS-$(MARCH_K8SSE3) += -march=k8-sse3 -CFLAGS-$(MARCH_AMDFAM10) += -march=amdfam10 -CXXFLAGS-$(MARCH_AMDFAM10) += -march=amdfam10 -CFLAGS-$(MARCH_BDVER1) += -march=bdver1 -CXXFLAGS-$(MARCH_BDVER1) += -march=bdver1 -CFLAGS-$(MARCH_BDVER2) += -march=bdver2 -CXXFLAGS-$(MARCH_BDVER2) += -march=bdver2 -CFLAGS-$(MARCH_BDVER3) += -march=bdver3 -CXXFLAGS-$(MARCH_BDVER3) += -march=bdver3 -CFLAGS-$(MARCH_BTVER1) += -march=btver1 -CXXFLAGS-$(MARCH_BTVER1) += -march=btver1 -CFLAGS-$(MARCH_BTVER2) += -march=btver2 -CXXFLAGS-$(MARCH_BTVER2) += -march=btver2 -- 2.7.4 _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |