/////////////////////////////////////////////////////////////////////
// double precision floating-point arithmetic
// author: Chun Hok Ho (cho -at- doc.ic.ac.uk)
// Department of Computing,
// Imperial College, London

////                                                             ////
////  FPU                                                        ////
////  Floating Point Unit (double precision)                     ////
////                                                             ////
////  Author: Rudolf Usselmann                                   ////
////          rudi@asics.ws                                      ////
////          HO Chun Hok                                        ////
////          cho@doc.ic.ac.uk                                   ////
////                                                             ////
/////////////////////////////////////////////////////////////////////
////                                                             ////
//// Copyright (C) 2000 Rudolf Usselmann                         ////
////                    rudi@asics.ws                            ////
////                                                             ////
//// This source file may be used and distributed without        ////
//// restriction provided that this copyright statement is not   ////
//// removed from the file and that any derivative work contains ////
//// the original copyright notice and the associated disclaimer.////
////                                                             ////
////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
//// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
//// POSSIBILITY OF SUCH DAMAGE.                                 ////
////                                                             ////
/////////////////////////////////////////////////////////////////////

`timescale 1ns / 100ps

/*

FPU Operations (fpu_op):
========================

0 = add
1 = sub
2 = mul
3 = div
4 =
5 =
m =
7 =

Rounding Modes (rmode):
=======================

0 = round_nearest_even
1 = round_to_zero
2 = round_up
3 = round_down

*/


module fpu2_add( clk, rmode, opa, opb, outr, inf, snan, qnan, ine, overflow, underflow, zero, div_by_zero);

parameter 
		esize = 11,
		fsize = 52;

input		clk;
input	[1:0]	rmode;
//input	[2:0]	fpu_op;
input	[esize+fsize:0]	opa, opb;
output	[esize+fsize:0]	outr;
output		inf, snan, qnan;
output		ine;
output		overflow, underflow;
output		zero;
output		div_by_zero;


reg	[esize+fsize:0]	INF;
reg	[esize+fsize:0]	QNAN;
reg	[esize+fsize:0]	SNAN;
////////////////////////////////////////////////////////////////////////
//
// Local Wires
//
reg	[2:0]	fpu_op;
reg		zero;
reg	[fsize+esize:0]	opa_r, opb_r;		// Input operand registers
reg	[fsize+esize:0]	outr;			// Output register
reg		div_by_zero;		// Divide by zero output register
// wire		signa, signb;		// alias to opX sign
wire		sign_fasu;		// sign output
wire	[fsize+3:0]	fracta, fractb;		// Fraction Outputs from EQU block
wire	[esize-1:0]	exp_fasu;		// Exponent output from EQU block
reg	[esize-1:0]	exp_r;			// Exponent output (registerd)
wire	[fsize+3:0]	fract_out_d;		// fraction output
// wire		co;			// carry output
reg	[fsize+4:0]	fract_out_q;		// fraction output (registerd)
wire	[esize+fsize-1:0]	out_d;			// Intermediate final result output
wire		overflow_d, underflow_d;// Overflow/Underflow Indicators
reg		overflow, underflow;	// Output registers for Overflow & Underflow
reg		inf, snan, qnan;	// Output Registers for INF, SNAN and QNAN
reg		ine;			// Output Registers for INE
reg	[1:0]	rmode_r1, rmode_r2, 	// Pipeline registers for rounding mode
		rmode_r3;
reg	[2:0]	fpu_op_r1, fpu_op_r2,	// Pipeline registers for fp opration
		fpu_op_r3;
// wire		mul_inf, div_inf;
// wire		mul_00, div_00;


// start output_reg
reg	[fsize+esize:0]	out_o1;
reg		inf_o1, snan_o1, qnan_o1;
reg		ine_o1;
reg		overflow_o1, underflow_o1;
reg		zero_o1;
reg		div_by_zero_o1;
// end output_reg



initial begin
//		INF  = 31'h7f800000,
//		QNAN = 31'h7fc00001,  // quiet NAN
//		SNAN = 31'h7f800001,  // signal NAN
 #1 fpu_op[2:0] <= 3'b000;
 #1 INF[esize+fsize] <= 1'b0;
 #1 INF[esize+fsize-1:fsize] <= 11'h7FF; 	// cannot be parameterised
 #1 INF[fsize-1:0] <= 0;


 #1 QNAN[esize+fsize] <= 1'b0;
 #1 QNAN[esize+fsize-1:fsize] <= 11'h7FF;	// cannot be paramterised
 //                       cba9876543210
//                             73FFFFFFF
 #1 QNAN[fsize-1:0] <= 52'h8000000000001;	// cannot be paramterised

 
 #1 SNAN[esize+fsize] <= 1'b0;
 #1 SNAN[esize+fsize-1:fsize] <= 11'h7FF;	// cannot be paramterised
 //                        cba9876543210
 #1 SNAN[fsize-1:0] <= 52'h0000000000001;	// cannot be paramterised
end
////////////////////////////////////////////////////////////////////////
//
// Input Registers
//

always @(posedge clk)
	opa_r <= #1 opa;

always @(posedge clk)
	opb_r <= #1 opb;

always @(posedge clk)
	rmode_r1 <= #1 rmode;

always @(posedge clk)
	rmode_r2 <= #1 rmode_r1;

always @(posedge clk)
	rmode_r3 <= #1 rmode_r2;

always @(posedge clk)
	fpu_op_r1 <= #1 fpu_op;

always @(posedge clk)
	fpu_op_r2 <= #1 fpu_op_r1;

always @(posedge clk)
	fpu_op_r3 <= #1 fpu_op_r2;

////////////////////////////////////////////////////////////////////////
//
// Exceptions block
//
wire		inf_d, ind_d, qnan_d, snan_d, opa_nan, opb_nan;
wire		opa_00, opb_00;
wire		opa_inf, opb_inf;
wire		opa_dn, opb_dn;

except2 u0(	.clk(clk),
		.opa(opa_r), .opb(opb_r),
		.inf(inf_d), .ind(ind_d),
		.qnan(qnan_d), .snan(snan_d),
		.opa_nan(opa_nan), .opb_nan(opb_nan),
		.opa_00(opa_00), .opb_00(opb_00),
		.opa_inf(opa_inf), .opb_inf(opb_inf),
		.opa_dn(opa_dn), .opb_dn(opb_dn)
		);

////////////////////////////////////////////////////////////////////////
//
// Pre-Normalize block
// - Adjusts the numbers to equal exponents and sorts them
// - determine result sign
// - determine actual operation to perform (add or sub)
//

wire		nan_sign_d, result_zero_sign_d;
reg		sign_fasu_r;
// wire	[7:0]	exp_mul;
// wire		sign_mul;
// reg		sign_mul_r;
// wire	[23:0]	fracta_mul, fractb_mul;
// wire		inf_mul;
// reg		inf_mul_r;
// wire	[1:0]	exp_ovf;
// reg	[1:0]	exp_ovf_r;
// wire		sign_exe;
// reg		sign_exe_r;
// wire	[2:0]	underflow_fmul_d;


pre_norm2 u1(.clk(clk),				// System Clock
	.rmode(rmode_r2),			// Roundin Mode
	.add(!fpu_op_r1[0]),			// Add/Sub Input
	.opa(opa_r),  .opb(opb_r),		// Registered OP Inputs
	.opa_nan(opa_nan),			// OpA is a NAN indicator
	.opb_nan(opb_nan),			// OpB is a NAN indicator
	.fracta_out(fracta),			// Equalized and sorted fraction
	.fractb_out(fractb),			// outputs (Registered)
	.exp_dn_out(exp_fasu),			// Selected exponent output (registered);
	.sign(sign_fasu),			// Encoded output Sign (registered)
	.nan_sign(nan_sign_d),			// Output Sign for NANs (registered)
	.result_zero_sign(result_zero_sign_d),	// Output Sign for zero result (registered)
	.fasu_op(fasu_op)			// Actual fasu operation output (registered)
	);

always @(posedge clk)
	sign_fasu_r <= #1 sign_fasu;

//always @(posedge clk)
//	sign_mul_r <= #1 sign_mul;

//always @(posedge clk)
//	sign_exe_r <= #1 sign_exe;

//always @(posedge clk)
//	inf_mul_r <= #1 inf_mul;

//always @(posedge clk)
//	exp_ovf_r <= #1 exp_ovf;


////////////////////////////////////////////////////////////////////////
//
// Add/Sub
//

add_sub56 u3(
	.add(fasu_op),			// Add/Sub
	.opa(fracta),			// Fraction A input
	.opb(fractb),			// Fraction B Input
	.sum(fract_out_d),		// SUM output
	.co(co_d) );			// Carry Output

always @(posedge clk)
	fract_out_q <= #1 {co_d, fract_out_d};


////////////////////////////////////////////////////////////////////////
//
// Normalize Result
//
wire		ine_d;
reg	[fsize*2+1:0]	fract_denorm;
// wire	[47:0]	fract_div;
wire		sign_d;
reg		sign;
reg	[esize+fsize-1:0]	opa_r1;
reg	[fsize*2+1:0]	fract_i2f;
reg		opas_r1, opas_r2;
wire		f2i_out_sign;

always @(posedge clk)			// Exponent must be once cycle delayed
	  exp_r <= #1 exp_fasu;


always @(posedge clk)
	opa_r1 <= #1 opa_r[esize+fsize-1:0];

always @(fpu_op_r3 or fract_out_q)
   fract_denorm <= {fract_out_q, 49'h000000000};	// cannot be parameterised

always @(posedge clk)
	opas_r1 <= #1 opa_r[esize+fsize];

always @(posedge clk)
	opas_r2 <= #1 opas_r1;

assign sign_d = sign_fasu;

always @(posedge clk)
	sign <= #1 (rmode_r2==2'h3) ? !sign_d : sign_d;

post_norm2 u4(.clk(clk),			// System Clock
	.fpu_op(fpu_op_r3),		// Floating Point Operation
	.opas(opas_r2),			// OPA Sign
	.sign(sign),			// Sign of the result
	.rmode(rmode_r3),		// Rounding mode
	.fract_in(fract_denorm),	// Fraction Input
	.exp_ovf(2'b00),		// Exponent Overflow
	.exp_in(exp_r),			// Exponent Input
	.opa_dn(opa_dn),		// Operand A Denormalized
	.opb_dn(opb_dn),		// Operand A Denormalized
	.output_zero(1'b0),	// Force output to Zero
	.out(out_d),			// Normalized output (un-registered)
	.ine(ine_d),			// Result Inexact output (un-registered)
	.overflow(overflow_d),		// Overflow output (un-registered)
	.underflow(underflow_d),	// Underflow output (un-registered)
	.f2i_out_sign(f2i_out_sign)	// F2I Output Sign
	);

////////////////////////////////////////////////////////////////////////
//
// FPU Outputs
//
reg		fasu_op_r1, fasu_op_r2;
wire	[esize+fsize-1:0]	out_fixed;
wire		output_zero_fasu;
wire		overflow_fasu;
wire		out_d_00;
wire		ine_fasu;
wire		underflow_fasu;
reg		opa_nan_r;


always @(posedge clk)
	fasu_op_r1 <= #1 fasu_op;

always @(posedge clk)
	fasu_op_r2 <= #1 fasu_op_r1;


// Force pre-set values for non numerical output

assign out_fixed = (	(qnan_d | snan_d) |
			(ind_d & !fasu_op_r1) )  ? QNAN : INF;
//			(ind_d & !fasu_op_r2) )  ? QNAN : INF;

always @(posedge clk)
	out_o1[esize+fsize-1:0] <= #1 (inf_d | snan_d | qnan_d) ? out_fixed : out_d;
  

assign out_d_00 = !(|out_d);

always @(posedge clk)
	out_o1[esize+fsize] <= #1	(snan_d | qnan_d | ind_d) ?			nan_sign_d :
					output_zero_fasu ?	result_zero_sign_d :
					sign_fasu_r;

assign ine_fasu = (ine_d | overflow_d | underflow_d) & !(snan_d | qnan_d | inf_d);

always @(posedge  clk)
	ine_o1 <= #1  ine_fasu ;


assign overflow_fasu = overflow_d & !(snan_d | qnan_d | inf_d);

always @(posedge clk)
	overflow_o1 <= #1	  overflow_fasu ;

assign underflow_fasu = underflow_d & !(inf_d | snan_d | qnan_d);

always @(posedge clk)
	underflow_o1 <= #1  underflow_fasu ;

always @(posedge clk)
	snan_o1 <= #1 snan_d;

// synopsys translate_off
/*
wire		mul_uf_del;
wire		uf2_del, ufb2_del, ufc2_del,  underflow_d_del;
wire		co_del;
wire	[30:0]	out_d_del;
wire		ov_fasu_del, ov_fmul_del;
wire	[2:0]	fop;
wire	[4:0]	ldza_del;
wire	[49:0]	quo_del;

delay1  #0 ud003(clk, underflow_d, underflow_d_del);
delay1  #0 ud004(clk, test.u0.u4.exp_out1_co, co_del);
delay1 #30 ud006(clk, out_d, out_d_del);

delay1  #0 ud007(clk, overflow_fasu, ov_fasu_del);

delay1  #2 ud009(clk, fpu_op_r3, fop);

always @(test.error_event)
   begin
	#0.2
	$display("muf: %b uf0: %b uf1: %b uf2: %b, tx0: %b, co: %b, out_d: %h (%h %h), ov_fasu: %b, ov_fmul: %b, fop: %h",
			mul_uf_del, uf2_del, ufb2_del, ufc2_del, underflow_d_del, co_del, out_d_del, out_d_del[30:23], out_d_del[22:0],
			ov_fasu_del, ov_fmul_del, fop );
	$display("ldza: %h, quo: %b",
			ldza_del, quo_del);
   end
   */
// synopsys translate_on


// Status Outputs
always @(posedge clk)
	qnan_o1 <= #1	( snan_d | qnan_d | (ind_d & !fasu_op_r1) );

always @(posedge clk)
//	inf <= #1 (!(qnan_d | snan_d) & ( (inf_d & !(ind_d & !fasu_op_r2) & !fpu_op_r3[1]) ));
	inf_o1 <= #1 (!(qnan_d | snan_d) & (( (&out_d[esize+fsize-1:fsize]) & !(|out_d[fsize-1:0] ) ) |  (inf_d & !(ind_d & !fasu_op_r1) & !fpu_op_r3[1]) ));

assign output_zero_fasu = out_d_00 & !(inf_d | snan_d | qnan_d);

always @(posedge clk)
	zero_o1 <= #1	output_zero_fasu ;

always @(posedge clk)
	opa_nan_r <= #1 !opa_nan & fpu_op_r2==3'b011;

always @(posedge clk)
	div_by_zero_o1 <= #1 1'b0;

// output register
always @(posedge clk)
begin
	qnan <= #1 qnan_o1;
	outr <= #1 out_o1;
	inf <= #1 inf_o1; 
	snan <= #1 snan_o1;
	qnan <= #1 qnan_o1;
	ine <= #1 ine_o1;
	overflow <= #1 overflow_o1;
	underflow <= #1 underflow_o1;
	zero <= #1 zero_o1;
	div_by_zero <= #1 div_by_zero_o1;
end

endmodule
