来源:网络素材
采用线性逼近法结合32段线性查找表的方式来实现1/z的计算。
首先将1/32-1/64的定点化数据存放到ROM中,ROM中存放的是扩大了2^20 次方的数字四舍五入后的整数部分。n值越大,精度越大,误差越小。这里取n=20;
ROM中存储的数据是1/(32+i)*2^20的四舍五入的整数部分。
32-64间的数据可以通过查表来实现,其他的数据则采用的是线性逼近的方法。
线性逼近的步骤为:
1.确定最高非零比特位的位置
2.对z进行左移或者右移,得到zp
3.zp查找ROM,得到1/zp,以及1/(zp+1),
4.求的1/zp-1/(zp+1),为误差A
5.N=z-zp*2^(m-5)
6.B=A/2^(m-5)*N
7.将扩大的部分缩小回去,或者缩小了的放大回去,那么1/z=(1/zp-B)*(1/2^(m-5))
代码插入:
module top_inv(
input clk,syn_rst,
input [20:0]dataa,
input [20:0]datab,
//input [20:0]ampout,
output reg [19:0]inv
// output reg done
);
reg [4:0] address1;
reg [4:0 ]address2;
wire [4:0] m;
// wire done;
reg [19:0]invr;
reg [20:0] ampout_r;
reg [20:0] ampout_r1;
wire [20:0] ampout;
reg [20:0] ampoutr1,ampoutr2,ampoutr3,ampoutr4;
wire [19:0] inv_r1;
wire [19:0] inv_r2;
reg [20:0] diff_r;
reg [19:0] diffr;
reg [19:0] diff;
reg [19:0] N;
reg [19:0] N1;
reg en;
always @(posedge clk or negedge syn_rst)
begin
if(~syn_rst)
begin
ampoutr1<=21'd0;
ampoutr2<=21'd0;
ampoutr3<=21'd0;
ampoutr4<=21'd0;
end
else
ampoutr1<=ampout;
ampoutr2<=ampoutr1;
ampoutr3<=ampoutr2;
ampoutr4<=ampoutr3;
end
reg [19:0] inv_r1t1,inv_r1t2,inv_r1t3;
always@(posedge clk or negedge syn_rst)
begin
if(~syn_rst)
begin
inv_r1t1<=0;
inv_r1t2<=0;
inv_r1t3<=0;
end
else
begin
inv_r1t1<=inv_r1;
inv_r1t2<=inv_r1t1;
inv_r1t3<=inv_r1t2;
end
end
reg [4:0] mt1,mt2,mt3,mt4,mt5;
always@(posedge clk or negedge syn_rst)
begin
if(~syn_rst)
begin
mt1<=0;
mt2<=0;
mt3<=0;
mt4<=0;
mt5<=0;
end
else
begin
mt1<=m;
mt2<=mt1;
mt3<=mt2;
mt4<=mt3;
mt5<=mt4;
end
end
reg sel;
reg selr1,selr2;
always @(posedge clk or negedge syn_rst)
begin
if(~syn_rst)
begin
diff<=0;
diffr <= 0;
ampout_r<='b0;
ampout_r1<=0;
address1<='b0;
address2<='b0;
en<=0;
sel<=0;
end
else
begin
// if(done)
//begin
if((ampout>=32)&&(ampout<=64))
begin
ampout_r<=0;
ampout_r1<=0;
address1<=ampoutr3-32;
address2<= 0;
diff <= 0;
diffr <= 0;
N <= 0;
N1<= 0;
en<=0;//不需要计算m的值
sel<=0;
selr1<=0;
selr2<=0;
end
else
begin
en<=1;//需要计算m的值
if(m>5)
begin
// ampoutrr<=ampout;
ampout_r<=ampoutr1>>(m-5);
ampout_r1<=ampout_r;//zp
address1<=ampout_r-32;///inv_r1
address2<=ampout_r-31;///inv_r2
diff <= inv_r1-inv_r2;
diffr <=diff;
N1<=ampout_r1<<(mt2-5);
N<=ampoutr4-N1;
selr1<=1;
selr2 <= selr1;
sel <= selr2;
end
if(m<5)
begin
//ampoutrr<=ampout;
ampout_r<=ampoutr1<<(5-m);// mt4 mt3 mt2
ampout_r1 <= ampout_r;// N N1 ampout_r1
address1<=ampout_r-32;///mt4 inv_r1
address2<=ampout_r-31;//inv_r1t3 inv_r2 mt1
diff <= inv_r1-inv_r2;//diff_r< diffr <=diff; // ampoutr3 ampoutr2 ampoutr1
N1<=ampout_r1>>(5-mt2);
N<=ampoutr4-N1;
selr1<=1;
selr2 <= selr1;
sel <= selr2;
end
end
end
// end
end
// assign diff=sel?(inv_r1-inv_r2):' b0;
//assign N=sel?(ampout-N1):0;
//assign diff_r = en?(diff*N>>(m-5)):0;
//assign diff_r = (m>5)?(diff*N>>(m-5)):(diff*N<<(5-m));
// assign inv = sel?(inv_r1-diff_r)>>(m-5):inv_r1;
always@(posedge clk or negedge syn_rst)
begin
if(~syn_rst)
begin
invr<=0;
// done<=0;
diff_r<=0;
end
else
begin
if(sel) begin
if(m>5)begin
diff_r <= diffr*N>>(mt4-5);
invr<=(inv_r1t3-diff_r)>>(mt5-5);
// done<=1;
end
else begin
diff_r <= diffr*N<<(5-mt4);
invr<=(inv_r1t3-diff_r)<<(5-mt5);
// done<=1;
end
end
else
begin
diff_r<=0;
invr<=inv_r1t3;
end
end
end
always@(posedge clk or negedge syn_rst)
begin
if(~syn_rst)
begin
inv<=0;
end
else
begin
if(invr)
inv<= invr;
else
inv<=inv;
end
end
//ROM 核的例化
rom u_rom(.clk(clk),
.address1(address1),
.address2(address2),
.inv_r1(inv_r1),
.inv_r2(inv_r2)//,
//.c(c)
);
//例化寻找最高非零位
not_0 u_not_0 (
// port map - connection between master ports and signals/registers
.ampout(ampout),
.clk(clk),
.m(m),
.en(en),
.syn_rst(syn_rst)
);
complex_abs u_comlex_abs(
.clk(clk),
.syn_rst(~syn_rst),
.dataa(dataa),
.datab(datab),
.ampout(ampout)
);
endmodule
那么最终的仿真结果:如果直接查询的话,结果输出延时一个时钟周期,如果线性逼近的方法得到,延时3-5个时钟周期,这里周期设定为20ns;
占用资源报告:
增加一个求平方根的模块以后的仿真结果(数据输入后,一共需要约10个时钟周期才可以计算出一个平方更求导数值)。有一个小疑问就是怎么添加一个标志信号,让我们知道哪里输出的inv 信号是有效的