linux/arch/m68k/fpsp040/x_unfl.S

|
|	x_unfl.sa 3.4 7/1/91
|
|	fpsp_unfl --- FPSP handler for underflow exception
|
| Trap disabled results
|	For 881/2 compatibility, sw must denormalize the intermediate
| result, then store the result.  Denormalization is accomplished
| by taking the intermediate result (which is always normalized) and
| shifting the mantissa right while incrementing the exponent until
| it is equal to the denormalized exponent for the destination
| format.  After denormalization, the result is rounded to the
| destination format.
|
| Trap enabled results
|	All trap disabled code applies.	In addition the exceptional
| operand needs to made available to the user with a bias of $6000
| added to the exponent.
|

|		Copyright (C) Motorola, Inc. 1990
|			All Rights Reserved
|
|       For details on the license for this file, please see the
|       file, README, in this same directory.

X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package

	|section	8

#include "fpsp.h"

	|xref	denorm
	|xref	round
	|xref	store
	|xref	g_rndpr
	|xref	g_opcls
	|xref	g_dfmtou
	|xref	real_unfl
	|xref	real_inex
	|xref	fpsp_done
	|xref	b1238_fix

	.global	fpsp_unfl
fpsp_unfl:
	link		%a6,#-LOCAL_SIZE
	fsave		-(%a7)
	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
	fmovemx	%fp0-%fp3,USER_FP0(%a6)
	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)

|
	bsrl		unf_res	|denormalize, round & store interm op
|
| If underflow exceptions are not enabled, check for inexact
| exception
|
	btstb		#unfl_bit,FPCR_ENABLE(%a6)
	beqs		ck_inex

	btstb		#E3,E_BYTE(%a6)
	beqs		no_e3_1
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
	bsrl		b1238_fix		|test for bug1238 case
	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
	orl		#sx_mask,E_BYTE(%a6)
no_e3_1:
	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx	USER_FP0(%a6),%fp0-%fp3
	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	frestore	(%a7)+
	unlk		%a6
	bral		real_unfl
|
| It is possible to have either inex2 or inex1 exceptions with the
| unfl.  If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
	moveb		FPCR_ENABLE(%a6),%d0
	andb		FPSR_EXCEPT(%a6),%d0
	andib		#0x3,%d0
	beqs		unfl_done

|
| Inexact enabled and reported, and we must take an inexact exception
|
take_inex:
	btstb		#E3,E_BYTE(%a6)
	beqs		no_e3_2
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
	bsrl		b1238_fix		|test for bug1238 case
	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
	orl		#sx_mask,E_BYTE(%a6)
no_e3_2:
	moveb		#INEX_VEC,EXC_VEC+1(%a6)
	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx        USER_FP0(%a6),%fp0-%fp3
	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	frestore        (%a7)+
	unlk            %a6
	bral		real_inex

unfl_done:
	bclrb		#E3,E_BYTE(%a6)
	beqs		e1_set		|if set then branch
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
	bsrl		b1238_fix		|test for bug1238 case
	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
	orl		#sx_mask,E_BYTE(%a6)
	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx	USER_FP0(%a6),%fp0-%fp3
	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	frestore	(%a7)+
	unlk		%a6
	bral		fpsp_done
e1_set:
	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx	USER_FP0(%a6),%fp0-%fp3
	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	unlk		%a6
	bral		fpsp_done
|
|	unf_res --- underflow result calculation
|
unf_res:
	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
|					;1=sgl, 2=dbl
|					;we need the RND_PREC in the
|					;upper word for round
	movew		#0,-(%a7)
	movew		%d0,-(%a7)	|copy RND_PREC to stack
|
|
| If the exception bit set is E3, the exceptional operand from the
| fpu is in WBTEMP; else it is in FPTEMP.
|
	btstb		#E3,E_BYTE(%a6)
	beqs		unf_E1
unf_E3:
	lea		WBTEMP(%a6),%a0	|a0 now points to operand
|
| Test for fsgldiv and fsglmul.  If the inst was one of these, then
| force the precision to extended for the denorm routine.  Use
| the user's precision for the round routine.
|
	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
	andiw		#0x7f,%d1
	cmpiw		#0x30,%d1		|check for sgldiv
	beqs		unf_sgl
	cmpiw		#0x33,%d1		|check for sglmul
	bnes		unf_cont	|if not, use fpcr prec in round
unf_sgl:
	clrl		%d0
	movew		#0x1,(%a7)	|override g_rndpr precision
|					;force single
	bras		unf_cont
unf_E1:
	lea		FPTEMP(%a6),%a0	|a0 now points to operand
unf_cont:
	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
	sne		LOCAL_SGN(%a0)		|store sign

	bsrl		denorm		|returns denorm, a0 points to it
|
| WARNING:
|				;d0 has guard,round sticky bit
|				;make sure that it is not corrupted
|				;before it reaches the round subroutine
|				;also ensure that a0 isn't corrupted

|
| Set up d1 for round subroutine d1 contains the PREC/MODE
| information respectively on upper/lower register halves.
|
	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
|						;mode in lower d1
	addl		(%a7)+,%d1		|merge PREC/MODE
|
| WARNING: a0 and d0 are assumed to be intact between the denorm and
| round subroutines. All code between these two subroutines
| must not corrupt a0 and d0.
|
|
| Perform Round
|	Input:		a0 points to input operand
|			d0{31:29} has guard, round, sticky
|			d1{01:00} has rounding mode
|			d1{17:16} has rounding precision
|	Output:		a0 points to rounded operand
|

	bsrl		round		|returns rounded denorm at (a0)
|
| Differentiate between store to memory vs. store to register
|
unf_store:
	bsrl		g_opcls		|returns opclass in d0{2:0}
	cmpib		#0x3,%d0
	bnes		not_opc011
|
| At this point, a store to memory is pending
|
opc011:
	bsrl		g_dfmtou
	tstb		%d0
	beqs		ext_opc011	|If extended, do not subtract
|				;If destination format is sgl/dbl,
	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
|					;subtract
	bmis		ext_opc011
	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
|				;normalized bias
|				;          normalized   denormalized
|				;single       $7f           $7e
|				;double       $3ff          $3fe
|
ext_opc011:
	bsrl		store		|stores to memory
	bras		unf_done	|finish up

|
| At this point, a store to a float register is pending
|
not_opc011:
	bsrl		store	|stores to float register
|				;a0 is not corrupted on a store to a
|				;float register.
|
| Set the condition codes according to result
|
	tstl		LOCAL_HI(%a0)	|check upper mantissa
	bnes		ck_sgn
	tstl		LOCAL_LO(%a0)	|check lower mantissa
	bnes		ck_sgn
	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
ck_sgn:
	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
	beqs		unf_done
	bsetb		#neg_bit,FPSR_CC(%a6)

|
| Finish.
|
unf_done:
	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
	beqs		no_aunfl
	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
no_aunfl:
	rts

	|end
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`\|`
			`\| x_unfl.sa 3.4 7/1/91`
			`\|`
			`\| fpsp_unfl --- FPSP handler for underflow exception`
			`\|`
			`\| Trap disabled results`
			`\| For 881/2 compatibility, sw must denormalize the intermediate`
			`\| result, then store the result. Denormalization is accomplished`
			`\| by taking the intermediate result (which is always normalized) and`
			`\| shifting the mantissa right while incrementing the exponent until`
			`\| it is equal to the denormalized exponent for the destination`
			`\| format. After denormalization, the result is rounded to the`
			`\| destination format.`
			`\|`
			`\| Trap enabled results`
			`\| All trap disabled code applies. In addition the exceptional`
			`\| operand needs to made available to the user with a bias of $6000`
			`\| added to the exponent.`
			`\|`

			`\| Copyright (C) Motorola, Inc. 1990`
			`\| All Rights Reserved`
			`\|`
[PATCH] Add wording to m68k .S files to help clarify license info Acked-by: Alan Cox <alan@redhat.com> Signed-off-by: Matt Waddel <Matt.Waddel@freescale.com> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-02-12 09:55:48 +08:00			`\| For details on the license for this file, please see the`
			`\| file, README, in this same directory.`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00
			`X_UNFL: \|idnt 2,1 \| Motorola 040 Floating Point Software Package`

			`\|section 8`

			`#include "fpsp.h"`

			`\|xref denorm`
			`\|xref round`
			`\|xref store`
			`\|xref g_rndpr`
			`\|xref g_opcls`
			`\|xref g_dfmtou`
			`\|xref real_unfl`
			`\|xref real_inex`
			`\|xref fpsp_done`
			`\|xref b1238_fix`

			`.global fpsp_unfl`
			`fpsp_unfl:`
			`link %a6,#-LOCAL_SIZE`
			`fsave -(%a7)`
			`moveml %d0-%d1/%a0-%a1,USER_DA(%a6)`
			`fmovemx %fp0-%fp3,USER_FP0(%a6)`
			`fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)`

			`\|`
			`bsrl unf_res \|denormalize, round & store interm op`
			`\|`
			`\| If underflow exceptions are not enabled, check for inexact`
			`\| exception`
			`\|`
			`btstb #unfl_bit,FPCR_ENABLE(%a6)`
			`beqs ck_inex`

			`btstb #E3,E_BYTE(%a6)`
			`beqs no_e3_1`
			`\|`
			`\| Clear dirty bit on dest resister in the frame before branching`
			`\| to b1238_fix.`
			`\|`
			`bfextu CMDREG3B(%a6){#6:#3},%d0 \|get dest reg no`
			`bclrb %d0,FPR_DIRTY_BITS(%a6) \|clr dest dirty bit`
			`bsrl b1238_fix \|test for bug1238 case`
			`movel USER_FPSR(%a6),FPSR_SHADOW(%a6)`
			`orl #sx_mask,E_BYTE(%a6)`
			`no_e3_1:`
			`moveml USER_DA(%a6),%d0-%d1/%a0-%a1`
			`fmovemx USER_FP0(%a6),%fp0-%fp3`
			`fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar`
			`frestore (%a7)+`
			`unlk %a6`
			`bral real_unfl`
			`\|`
			`\| It is possible to have either inex2 or inex1 exceptions with the`
			`\| unfl. If the inex enable bit is set in the FPCR, and either`
			`\| inex2 or inex1 occurred, we must clean up and branch to the`
			`\| real inex handler.`
			`\|`
			`ck_inex:`
			`moveb FPCR_ENABLE(%a6),%d0`
			`andb FPSR_EXCEPT(%a6),%d0`
			`andib #0x3,%d0`
			`beqs unfl_done`

			`\|`
			`\| Inexact enabled and reported, and we must take an inexact exception`
			`\|`
			`take_inex:`
			`btstb #E3,E_BYTE(%a6)`
			`beqs no_e3_2`
			`\|`
			`\| Clear dirty bit on dest resister in the frame before branching`
			`\| to b1238_fix.`
			`\|`
			`bfextu CMDREG3B(%a6){#6:#3},%d0 \|get dest reg no`
			`bclrb %d0,FPR_DIRTY_BITS(%a6) \|clr dest dirty bit`
			`bsrl b1238_fix \|test for bug1238 case`
			`movel USER_FPSR(%a6),FPSR_SHADOW(%a6)`
			`orl #sx_mask,E_BYTE(%a6)`
			`no_e3_2:`
			`moveb #INEX_VEC,EXC_VEC+1(%a6)`
			`moveml USER_DA(%a6),%d0-%d1/%a0-%a1`
			`fmovemx USER_FP0(%a6),%fp0-%fp3`
			`fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar`
			`frestore (%a7)+`
			`unlk %a6`
			`bral real_inex`

			`unfl_done:`
			`bclrb #E3,E_BYTE(%a6)`
			`beqs e1_set \|if set then branch`
			`\|`
			`\| Clear dirty bit on dest resister in the frame before branching`
			`\| to b1238_fix.`
			`\|`
			`bfextu CMDREG3B(%a6){#6:#3},%d0 \|get dest reg no`
			`bclrb %d0,FPR_DIRTY_BITS(%a6) \|clr dest dirty bit`
			`bsrl b1238_fix \|test for bug1238 case`
			`movel USER_FPSR(%a6),FPSR_SHADOW(%a6)`
			`orl #sx_mask,E_BYTE(%a6)`
			`moveml USER_DA(%a6),%d0-%d1/%a0-%a1`
			`fmovemx USER_FP0(%a6),%fp0-%fp3`
			`fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar`
			`frestore (%a7)+`
			`unlk %a6`
			`bral fpsp_done`
			`e1_set:`
			`moveml USER_DA(%a6),%d0-%d1/%a0-%a1`
			`fmovemx USER_FP0(%a6),%fp0-%fp3`
			`fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar`
			`unlk %a6`
			`bral fpsp_done`
			`\|`
			`\| unf_res --- underflow result calculation`
			`\|`
			`unf_res:`
			`bsrl g_rndpr \|returns RND_PREC in d0 0=ext,`
			`\| ;1=sgl, 2=dbl`
			`\| ;we need the RND_PREC in the`
			`\| ;upper word for round`
			`movew #0,-(%a7)`
			`movew %d0,-(%a7) \|copy RND_PREC to stack`
			`\|`
			`\|`
			`\| If the exception bit set is E3, the exceptional operand from the`
			`\| fpu is in WBTEMP; else it is in FPTEMP.`
			`\|`
			`btstb #E3,E_BYTE(%a6)`
			`beqs unf_E1`
			`unf_E3:`
			`lea WBTEMP(%a6),%a0 \|a0 now points to operand`
			`\|`
			`\| Test for fsgldiv and fsglmul. If the inst was one of these, then`
			`\| force the precision to extended for the denorm routine. Use`
			`\| the user's precision for the round routine.`
			`\|`
			`movew CMDREG3B(%a6),%d1 \|check for fsgldiv or fsglmul`
			`andiw #0x7f,%d1`
			`cmpiw #0x30,%d1 \|check for sgldiv`
			`beqs unf_sgl`
			`cmpiw #0x33,%d1 \|check for sglmul`
			`bnes unf_cont \|if not, use fpcr prec in round`
			`unf_sgl:`
			`clrl %d0`
			`movew #0x1,(%a7) \|override g_rndpr precision`
			`\| ;force single`
			`bras unf_cont`
			`unf_E1:`
			`lea FPTEMP(%a6),%a0 \|a0 now points to operand`
			`unf_cont:`
			`bclrb #sign_bit,LOCAL_EX(%a0) \|clear sign bit`
			`sne LOCAL_SGN(%a0) \|store sign`

			`bsrl denorm \|returns denorm, a0 points to it`
			`\|`
			`\| WARNING:`
			`\| ;d0 has guard,round sticky bit`
			`\| ;make sure that it is not corrupted`
			`\| ;before it reaches the round subroutine`
			`\| ;also ensure that a0 isn't corrupted`

			`\|`
			`\| Set up d1 for round subroutine d1 contains the PREC/MODE`
			`\| information respectively on upper/lower register halves.`
			`\|`
			`bfextu FPCR_MODE(%a6){#2:#2},%d1 \|get mode from FPCR`
			`\| ;mode in lower d1`
			`addl (%a7)+,%d1 \|merge PREC/MODE`
			`\|`
			`\| WARNING: a0 and d0 are assumed to be intact between the denorm and`
			`\| round subroutines. All code between these two subroutines`
			`\| must not corrupt a0 and d0.`
			`\|`
			`\|`
			`\| Perform Round`
			`\| Input: a0 points to input operand`
			`\| d0{31:29} has guard, round, sticky`
			`\| d1{01:00} has rounding mode`
			`\| d1{17:16} has rounding precision`
			`\| Output: a0 points to rounded operand`
			`\|`

			`bsrl round \|returns rounded denorm at (a0)`
			`\|`
			`\| Differentiate between store to memory vs. store to register`
			`\|`
			`unf_store:`
			`bsrl g_opcls \|returns opclass in d0{2:0}`
			`cmpib #0x3,%d0`
			`bnes not_opc011`
			`\|`
			`\| At this point, a store to memory is pending`
			`\|`
			`opc011:`
			`bsrl g_dfmtou`
			`tstb %d0`
			`beqs ext_opc011 \|If extended, do not subtract`
			`\| ;If destination format is sgl/dbl,`
			`tstb LOCAL_HI(%a0) \|If rounded result is normal,don't`
			`\| ;subtract`
			`bmis ext_opc011`
			`subqw #1,LOCAL_EX(%a0) \|account for denorm bias vs.`
			`\| ;normalized bias`
			`\| ; normalized denormalized`
			`\| ;single $7f $7e`
			`\| ;double $3ff $3fe`
			`\|`
			`ext_opc011:`
			`bsrl store \|stores to memory`
			`bras unf_done \|finish up`

			`\|`
			`\| At this point, a store to a float register is pending`
			`\|`
			`not_opc011:`
			`bsrl store \|stores to float register`
			`\| ;a0 is not corrupted on a store to a`
			`\| ;float register.`
			`\|`
			`\| Set the condition codes according to result`
			`\|`
			`tstl LOCAL_HI(%a0) \|check upper mantissa`
			`bnes ck_sgn`
			`tstl LOCAL_LO(%a0) \|check lower mantissa`
			`bnes ck_sgn`
			`bsetb #z_bit,FPSR_CC(%a6) \|set condition codes if zero`
			`ck_sgn:`
			`btstb #sign_bit,LOCAL_EX(%a0) \|check the sign bit`
			`beqs unf_done`
			`bsetb #neg_bit,FPSR_CC(%a6)`

			`\|`
			`\| Finish.`
			`\|`
			`unf_done:`
			`btstb #inex2_bit,FPSR_EXCEPT(%a6)`
			`beqs no_aunfl`
			`bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)`
			`no_aunfl:`
			`rts`

			`\|end`