[Cache] fix bug & add PLRU

[testbench] make verilator linter happy
This commit is contained in:
Paul Pan 2021-07-07 23:28:18 +08:00
parent 171371fc4f
commit 9e751c3f61
9 changed files with 301 additions and 114 deletions

View File

@ -20,10 +20,12 @@ module ICache (
ICacheStatus_t status, nextStatus; ICacheStatus_t status, nextStatus;
logic [31:0] PC; logic [31:0] PC;
wire [4:0] cacheAddress; // PC[9:4] wire [5:0] cacheAddress; // PC[9:4]
logic [`IC_DATA_LENGTH-1:0] cacheLine; // Cache Line logic [`IC_DATA_LENGTH-1:0] cacheLine; // Cache Line
logic [31:0] cacheLineData[4]; // Map Cache Line into 4 Cache Data logic [31:0] cacheLineData[4]; // Map Cache Line into 4 Cache Data
logic [3:0] cacheLRU[64]; // Recent Access bitmap
logic [3:0] nextCacheLRU[64];
logic hit; // Cache hit or not logic hit; // Cache hit or not
logic hitWay[4]; // Cache Line hit or not logic hitWay[4]; // Cache Line hit or not
@ -40,30 +42,44 @@ module ICache (
DataRAM1.wen = 0; DataRAM1.wen = 0;
DataRAM2.wen = 0; DataRAM2.wen = 0;
DataRAM3.wen = 0; DataRAM3.wen = 0;
for (integer i = 0; i < 64; i++) begin
cacheLRU[i][0] = 0;
cacheLRU[i][1] = 0;
cacheLRU[i][2] = 0;
cacheLRU[i][3] = 0;
end
end end
// Main: /*
* ICIdle: update sram.addr_ok and **PC**
* ICLookup: update sram.data_ok and **PC**
* ICMiss:
* ICReplace:
* ICRefill:
*/
// Main-FF:
always_ff @(posedge clk) begin always_ff @(posedge clk) begin
sram.data_ok = 0; sram.data_ok <= 0;
sram.addr_ok = 0; sram.addr_ok <= 0;
PC <= 0;
cacheLRU <= nextCacheLRU;
case (status) case (status)
ICIdle: begin ICIdle: begin
if (sram.req == 1) begin if (sram.req == 1) begin
PC <= sram.addr; PC <= sram.addr;
sram.addr_ok = 1; sram.addr_ok <= 1;
end end
end end
ICLookup: begin ICLookup: begin
if (hit) begin if (hit) begin
// HandShake with core
sram.data_ok <= 1; sram.data_ok <= 1;
if (sram.req == 1) begin if (sram.req == 1) begin
PC <= sram.addr; PC <= sram.addr;
sram.addr_ok = 1; sram.addr_ok <= 1;
end end
end else begin
// TBD
end end
end end
@ -82,29 +98,79 @@ module ICache (
end end
// Next Status Generator: // Main-COMB:
always_comb begin always_comb begin
case (status) case (status)
ICIdle: begin // IDLE or LOOKUP ICIdle: begin
end
ICLookup: begin
// Update LRU
if (hitWay[0]) begin
casez (cacheLRU[cacheAddress])
4'b111?: nextCacheLRU[cacheAddress] = 4'b0001;
default: nextCacheLRU[cacheAddress][0] = 1'b1;
endcase
end
if (hitWay[1]) begin
casez (cacheLRU[cacheAddress])
4'b11?1: nextCacheLRU[cacheAddress] = 4'b0010;
default: nextCacheLRU[cacheAddress][1] = 1'b1;
endcase
end
if (hitWay[2]) begin
casez (cacheLRU[cacheAddress])
4'b1?11: nextCacheLRU[cacheAddress] = 4'b0100;
default: nextCacheLRU[cacheAddress][2] = 1'b1;
endcase
end
if (hitWay[3]) begin
casez (cacheLRU[cacheAddress])
4'b?111: nextCacheLRU[cacheAddress] = 4'b1000;
default: nextCacheLRU[cacheAddress][3] = 1'b1;
endcase
end
end
ICMiss: begin
end
ICReplace: begin
end
ICRefill: begin
end
default: begin
end
endcase
end
// Next Status Generator:
always_comb begin
nextStatus = ICIdle; // avoid latch
case (status)
ICIdle: begin // -> IDLE or LOOKUP
if (sram.req) nextStatus = ICLookup; if (sram.req) nextStatus = ICLookup;
else nextStatus = ICIdle; else nextStatus = ICIdle;
end end
ICLookup: begin // IDLE or LOOKUP or MISS ICLookup: begin // -> IDLE or LOOKUP or MISS
if (hit) begin if (hit) begin
if (sram.req) nextStatus = ICLookup; if (sram.req) nextStatus = ICLookup;
else nextStatus = ICIdle; else nextStatus = ICIdle;
end else nextStatus = ICMiss; end else nextStatus = ICMiss;
end end
ICMiss: begin // REPLACE ICMiss: begin // -> REPLACE
end end
ICReplace: begin // REFILL ICReplace: begin // -> REFILL
end end
ICRefill: begin // IDLE or LOOKUP ICRefill: begin // -> IDLE or LOOKUP
end end
default: nextStatus = ICIdle; default: nextStatus = ICIdle;
endcase endcase
end end
// Next Statue Updater:
always_ff @(posedge clk) begin always_ff @(posedge clk) begin
status <= nextStatus; status <= nextStatus;
end end
@ -113,7 +179,6 @@ module ICache (
assign cacheAddress = PC[9:4]; assign cacheAddress = PC[9:4];
assign tag = PC[31:10]; assign tag = PC[31:10];
// Hit Check: // Hit Check:
assign hitWay[0] = TagRAM0.rdata[0] & TagRAM0.rdata[`IC_TAG_LENGTH-1:1] == tag; assign hitWay[0] = TagRAM0.rdata[0] & TagRAM0.rdata[`IC_TAG_LENGTH-1:1] == tag;
assign hitWay[1] = TagRAM1.rdata[0] & TagRAM1.rdata[`IC_TAG_LENGTH-1:1] == tag; assign hitWay[1] = TagRAM1.rdata[0] & TagRAM1.rdata[`IC_TAG_LENGTH-1:1] == tag;
@ -121,30 +186,30 @@ module ICache (
assign hitWay[3] = TagRAM3.rdata[0] & TagRAM3.rdata[`IC_TAG_LENGTH-1:1] == tag; assign hitWay[3] = TagRAM3.rdata[0] & TagRAM3.rdata[`IC_TAG_LENGTH-1:1] == tag;
assign hit = hitWay[0] | hitWay[1] | hitWay[2] | hitWay[3]; assign hit = hitWay[0] | hitWay[1] | hitWay[2] | hitWay[3];
assign cacheLine = (hitWay[0] ? DataRAM0.rdata : 32'b0) | assign cacheLine = (hitWay[0] ? DataRAM0.rdata : 128'b0) |
(hitWay[1] ? DataRAM1.rdata : 32'b0) | (hitWay[1] ? DataRAM1.rdata : 128'b0) |
(hitWay[2] ? DataRAM2.rdata : 32'b0) | (hitWay[2] ? DataRAM2.rdata : 128'b0) |
(hitWay[3] ? DataRAM3.rdata : 32'b0); (hitWay[3] ? DataRAM3.rdata : 128'b0);
assign cacheLineData[0] = cacheLine[31:0]; assign cacheLineData[0] = cacheLine[31:0];
assign cacheLineData[1] = cacheLine[63:32]; assign cacheLineData[1] = cacheLine[63:32];
assign cacheLineData[2] = cacheLine[95:64]; assign cacheLineData[2] = cacheLine[95:64];
assign cacheLineData[3] = cacheLine[127:96]; assign cacheLineData[3] = cacheLine[127:96];
assign sram.rdata0 = cacheLineData[PC[3]]; assign sram.rdata0 = cacheLineData[PC[3:2]];
assign sram.rdata1 = cacheLineData[PC[3]+1]; assign sram.rdata1 = cacheLineData[{PC[3], 1'b1}];
// Block RAM: // Block RAM:
assign TagRam0.addr = cacheAddress; assign TagRAM0.addr = cacheAddress;
assign TagRam1.addr = cacheAddress; assign TagRAM1.addr = cacheAddress;
assign TagRam2.addr = cacheAddress; assign TagRAM2.addr = cacheAddress;
assign TagRam3.addr = cacheAddress; assign TagRAM3.addr = cacheAddress;
assign DataRam0.addr = cacheAddress; assign DataRAM0.addr = cacheAddress;
assign DataRam1.addr = cacheAddress; assign DataRAM1.addr = cacheAddress;
assign DataRam2.addr = cacheAddress; assign DataRAM2.addr = cacheAddress;
assign DataRam3.addr = cacheAddress; assign DataRAM3.addr = cacheAddress;
cache_tag_bram tag_ram0 ( cache_tag_bram tag_ram0 (
.addra(TagRAM0.addr), .addra(TagRAM0.addr),

View File

@ -1,7 +0,0 @@
module clk_wiz(
input clk_in1, reset,
output clk_out1);
assign clk_out1 = clk_in1;
endmodule

View File

@ -0,0 +1,23 @@
// Make Linter Happy
`include "ICache.svh"
`include "sram.svh"
module cache_data_bram (
input [5:0] addra,
input clka,
input [`IC_DATA_LENGTH-1:0] dina,
output [`IC_DATA_LENGTH-1:0] douta,
input wea
);
logic [`IC_DATA_LENGTH-1:0] tmp;
assign douta = tmp;
initial begin
tmp = 0;
end
always_ff @(posedge clka) begin
tmp <= {{(`IC_DATA_LENGTH - 6) {1'b0}}, addra};
end
endmodule

View File

@ -0,0 +1,26 @@
// Make Linter Happy
`include "ICache.svh"
`include "sram.svh"
module cache_tag_bram (
input [5:0] addra,
input clka,
input [`IC_TAG_LENGTH-1:0] dina,
output [`IC_TAG_LENGTH-1:0] douta,
input wea
);
logic [`IC_TAG_LENGTH-1:0] tmp;
assign douta = tmp;
initial begin
tmp = 0;
end
always_ff @(posedge clka) begin
tmp <= {{(`IC_TAG_LENGTH - 6) {1'b0}}, addra};
end
endmodule

View File

@ -0,0 +1,52 @@
// Make Linter Happy
`include "ICache.svh"
`include "sram.svh"
module happy ();
logic clk, rst;
/* verilator lint_off UNOPTFLAT */
integer counter = 0;
/* verilator lint_on UNOPTFLAT */
integer i;
sramro_i fake ();
ICache ICache (
.clk (clk),
.rst (rst),
.sram(fake.slave)
);
initial begin
$dumpfile("test.vcd");
$dumpvars(0, testbench);
$dumpvars(1, fake.req);
$dumpvars(1, fake.addr);
$dumpvars(1, fake.addr_ok);
$dumpvars(1, fake.data_ok);
$dumpvars(1, fake.rdata0);
$dumpvars(1, fake.rdata1);
rst = 0;
clk = 1;
fake.req = 1;
fake.addr = 32'b0100000;
end
always_latch begin
clk = ~clk;
fake.addr = fake.addr + 1;
if (clk == 1'b1) begin
counter = counter + 1;
if (counter >= 1024) $finish;
end
end
endmodule

View File

@ -0,0 +1,19 @@
HOME = ../..
INC = ${HOME}/include
sources += testbench.sv
sources += ${HOME}/Cache/ICache.sv
run: test.vcd
open test.vcd
clean:
rm -f test.vcd test.out
test.vcd: test.out
vvp test.out
test.out: ${sources}
iverilog -I ${INC} -g2005-sv -Wall -s testbench -o $@ $^
.PHONY: run clean

View File

@ -0,0 +1,87 @@
`include "ICache.svh"
`include "sram.svh"
module cache_tag_bram (
input [5:0] addra,
input clka,
input [`IC_TAG_LENGTH-1:0] dina,
output [`IC_TAG_LENGTH-1:0] douta,
input wea
);
logic [`IC_TAG_LENGTH-1:0] tmp;
assign douta = tmp;
initial begin
tmp = 0;
end
always_ff @(posedge clka) begin
tmp <= {{(`IC_TAG_LENGTH - 6) {1'b0}}, addra};
end
endmodule
module cache_data_bram (
input [5:0] addra,
input clka,
input [`IC_DATA_LENGTH-1:0] dina,
output [`IC_DATA_LENGTH-1:0] douta,
input wea
);
logic [`IC_DATA_LENGTH-1:0] tmp;
assign douta = tmp;
initial begin
tmp = 0;
end
always_ff @(posedge clka) begin
tmp <= {{(`IC_DATA_LENGTH - 6) {1'b0}}, addra};
end
endmodule
module testbench ();
logic clk, rst;
integer counter = 0;
integer i;
sramro_i fake ();
ICache ic (
.clk (clk),
.rst (rst),
.sram(fake.slave)
);
initial begin
$dumpfile("test.vcd");
$dumpvars(0, testbench);
$dumpvars(1, fake.req);
$dumpvars(1, fake.addr);
$dumpvars(1, fake.addr_ok);
$dumpvars(1, fake.data_ok);
$dumpvars(1, fake.rdata0);
$dumpvars(1, fake.rdata1);
rst = 0;
clk = 1;
fake.req = 1;
fake.addr = 32'b0100000;
end
always begin
#5;
clk = ~clk;
fake.addr = fake.addr + 1;
if (clk == 1'b1) begin
counter = counter + 1;
if (counter >= 1024) $finish;
end
end
endmodule

View File

@ -1,29 +0,0 @@
module imem(
input [6:0] a,
output [31:0] spo);
reg [31:0] RAM[127:0];
initial begin
end
assign spo = RAM[a];
endmodule
module dmem(
input clk,
input [6:0] a,
input [7:0] d,
input we,
output [7:0] spo);
reg [7:0] RAM[127:0];
assign spo = RAM[a];
always @(posedge clk)
if(we)
RAM[a] <= d;
endmodule

View File

@ -1,49 +0,0 @@
module testbench();
reg clk, rstn;
wire [15:0] SW = 0;
wire [7:0] SEG, AN;
fpga_top fpga_top(
clk, rstn,
SW,
SEG, AN);
integer counter = 0;
integer finish = 0;
integer i;
initial begin
$dumpfile("test.vcd");
$dumpvars(0, testbench);
for(i = 0; i < 128; i = i + 1)
$dumpvars(1, fpga_top.imem.RAM[i]);
for(i = 0; i < 128; i = i + 1)
$dumpvars(1, fpga_top.dmem0.RAM[i]);
for(i = 0; i < 128; i = i + 1)
$dumpvars(1, fpga_top.dmem1.RAM[i]);
for(i = 0; i < 128; i = i + 1)
$dumpvars(1, fpga_top.dmem2.RAM[i]);
for(i = 0; i < 128; i = i + 1)
$dumpvars(1, fpga_top.dmem3.RAM[i]);
for(i = 0; i < 32; i = i + 1)
$dumpvars(1, fpga_top.riscv.rf.rf[i]);
rstn = 0;
clk = 1;
#5;
rstn = 1;
end
always begin
#50;
clk = ~clk;
if(clk == 1'b1) begin
counter = counter + 1;
if(fpga_top.instr === 32'bx) finish = finish + 1;
else finish = 0;
if(finish >= 10 | counter >= 1024) $finish;
end
end
endmodule