From 9df689ed7a81dbeefd96b0b43ec2137dc18d5125 Mon Sep 17 00:00:00 2001 From: Paul Pan Date: Sun, 5 Sep 2021 23:59:23 +0800 Subject: [PATCH] refactor CACHE inst on D-CACHE --- resources/2021/soft/func/inst/inst_test.h | 8 +- .../2021/soft/func/inst/n98_cache_dcache.S | 76 +++++++++++ src/Cache/DCache.sv | 45 +++---- src/MMU/MMU.sv | 121 ++++++++++++++---- src/include/DCache.svh | 6 +- 5 files changed, 199 insertions(+), 57 deletions(-) diff --git a/resources/2021/soft/func/inst/inst_test.h b/resources/2021/soft/func/inst/inst_test.h index 7f704ba..f15cef0 100644 --- a/resources/2021/soft/func/inst/inst_test.h +++ b/resources/2021/soft/func/inst/inst_test.h @@ -1462,8 +1462,10 @@ LI(t2, data1); \ LI(t3, data2); \ /* prepare -> hit writeback invalidate */ \ - sw t2, offset(t0); \ + sw t2, offset(t1); \ cache 21, offset(t1); \ + lw a0, offset(t0); \ + bne t2, a0, inst_error; \ lw a0, offset(t1); \ bne t2, a0, inst_error; \ nop; \ @@ -1472,12 +1474,16 @@ cache 17, offset(t1); \ lw a0, offset(t0); \ bne a0, t2, inst_error; \ + lw a0, offset(t1); \ + bne a0, t2, inst_error; \ nop; \ /* test hit writeback invalidate */ \ sw t3, offset(t1); \ cache 21, offset(t1); \ lw a0, offset(t0); \ bne a0, t3, inst_error; \ + lw a0, offset(t1); \ + bne a0, t3, inst_error; \ nop; \ /* test multiple*/ \ addi a1, t1, 4; \ diff --git a/resources/2021/soft/func/inst/n98_cache_dcache.S b/resources/2021/soft/func/inst/n98_cache_dcache.S index bf602fa..f77071a 100644 --- a/resources/2021/soft/func/inst/n98_cache_dcache.S +++ b/resources/2021/soft/func/inst/n98_cache_dcache.S @@ -8,6 +8,9 @@ LEAF(n98_cache_dcache_test) li s2, 0x0 ###test inst +## CACHE 17 D-Cache Hit Invalid +## CACHE 21 D-Cache Hit Writeback Invalid + TEST_CACHE_DCACHE_HIT(0xa00d0000, 0x800d0000, 0, 0xA5A5A5A5, 0x12345678) TEST_CACHE_DCACHE_HIT(0xa00d0004, 0x800d0004, 0, 0xABCDEFAB, 0xF0F0F0F0) TEST_CACHE_DCACHE_HIT(0xa00d0008, 0x800d0008, 4, 0x87654321, 0xAABBCCDD) @@ -125,7 +128,80 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) nop +/* TEST D-CACHE Index OP */ +## CACHE 1 D-Cache Index Writeback Invalid +## CACHE 9 D-Cache Index Store Tag +.n98_c1_prepare: + li a0, 0x12345678 + li v0, 0x800d0000 + li v1, 0x800d0200 +.n98_c1_loop: + beq v0, v1, .n98_c1_check + sw a0, 0(v0) + sw a0, 4(v0) + sw a0, 8(v0) + sw a0, 12(v0) + addi a1, v0, 0 + GET_DCACHE_INDEX + cache 0, 0(v0) + addi v0, a1, 16 + j .n98_c1_loop +.n98_c1_check: + li v0, 0xa00d0000 + li v1, 0xa00d0200 +.n98_c1_check_loop: + beq v0, v1, .n98_c1_check2 + lw a1, 0(v0) + bne a0, a1, inst_error + addi v0, v0, 4 + j .n98_c1_check_loop +.n98_c1_check2: + li v0, 0x800d0000 + li v1, 0x800d0200 +.n98_c1_check2_loop: + beq v0, v1, .n98_c9_prepare + lw a1, 0(v0) + bne a0, a1, inst_error + addi v0, v0, 4 + j .n98_c1_check2_loop + + +.n98_c9_prepare: + li v0, 0x800d0000 + li v1, 0x800d0200 +.n98_c9_loop: + beq v0, v1, .n98_c9_check + sw zero, 0(v0) + sw zero, 4(v0) + sw zero, 8(v0) + sw zero, 12(v0) + addi a1, v0, 0 + GET_DCACHE_INDEX + cache 9, 0(v0) + addi v0, a1, 16 + j .n98_c9_loop +.n98_c9_check: + li v0, 0xa00d0000 + li v1, 0xa00d0200 +.n98_c9_check_loop: + beq v0, v1, .n98_c9_check2 + lw a1, 0(v0) + bne a0, a1, inst_error + addi v0, v0, 4 + j .n98_c9_check_loop +.n98_c9_check2: + li v0, 0x800d0000 + li v1, 0x800d0200 +.n98_c9_check2_loop: + beq v0, v1, .n98_done + lw a1, 0(v0) + bne a0, a1, inst_error + addi v0, v0, 4 + j .n98_c9_check2_loop + +.n98_done: + nop ###detect exception bne s2, zero, inst_error diff --git a/src/Cache/DCache.sv b/src/Cache/DCache.sv index 96c4082..6a43848 100644 --- a/src/Cache/DCache.sv +++ b/src/Cache/DCache.sv @@ -24,22 +24,22 @@ module DCache ( // ============================== // Four way assoc bram controller: - DCTagRAM_t TagRAM0, TagRAM1, TagRAM2, TagRAM3; + DCTagRAM_t TagRAM0, TagRAM1, TagRAM2, TagRAM3; DCDataRAM_t DataRAM0, DataRAM1, DataRAM2, DataRAM3; logic [3:0] LRU[128]; logic [3:0] nextLRU; logic [3:0] nowLRU; - DCTag_t tagOut[4]; - DCData_t dataOut[4]; + DCTag_t tagOut[4]; + DCData_t dataOut[4]; logic [3:0] tagV; DCIndexL_t index1; - logic hit; + logic hit; logic [3:0] hitWay; - DCData_t cacheLine; + DCData_t cacheLine; logic [3:0] victim; logic [3:0] wen; @@ -56,18 +56,8 @@ module DCache ( // ======== Flip-Flop ======== // =========================== - ffen #(`DC_TAGL-`DC_INDEXL) index_ff ( - clk, - port.index, - port.req, - index1 - ); - ffen #(4) wen_ff ( - clk, - wen, - en2, - wen2 - ); + ffen #(`DC_TAGL-`DC_INDEXL) index_ff (clk, port.index, port.req, index1); + ffen #(4) wen_ff (clk, wen, en2, wen2); // =============================== // ======== State Machine ======== @@ -97,7 +87,7 @@ module DCache ( nextState = IDLE; end end else begin - if (hit) begin + if (hit & ~port.clearWb | port.clear & port.clearIdx) begin if (port.wvalid) begin bwe1 = 1'b1; nextState = IDLE; @@ -161,20 +151,21 @@ module DCache ( // ============================== // Choose Victim - assign victim = port.clear ? hitWay - : tagV[0] == 0 ? 4'b0001 - : tagV[1] == 0 ? 4'b0010 - : tagV[2] == 0 ? 4'b0100 - : tagV[3] == 0 ? 4'b1000 + assign victim = port.clearWb ? hitWay + : tagV[0] == 0 ? 4'b0001 + : tagV[1] == 0 ? 4'b0010 + : tagV[2] == 0 ? 4'b0100 + : tagV[3] == 0 ? 4'b1000 : nowLRU[0] == 0 & ~tagOut[0].dirty ? 4'b0001 : nowLRU[1] == 0 & ~tagOut[1].dirty ? 4'b0010 : nowLRU[2] == 0 & ~tagOut[2].dirty ? 4'b0100 : nowLRU[3] == 0 & ~tagOut[3].dirty ? 4'b1000 - : nowLRU[0] == 0 ? 4'b0001 - : nowLRU[1] == 0 ? 4'b0010 - : nowLRU[2] == 0 ? 4'b0100 + : nowLRU[0] == 0 ? 4'b0001 + : nowLRU[1] == 0 ? 4'b0010 + : nowLRU[2] == 0 ? 4'b0100 : 4'b1000; - assign wen = hit ? hitWay : victim; + assign wen = port.clear & port.clearIdx ? 4'b1111 + : hit ? hitWay : victim; assign port.dirt_valid = (state == LOOKUP) & |{tagV & {tagOut[3].dirty, tagOut[2].dirty, tagOut[1].dirty, tagOut[0].dirty} & victim}; diff --git a/src/MMU/MMU.sv b/src/MMU/MMU.sv index a6ec59f..3992d3c 100644 --- a/src/MMU/MMU.sv +++ b/src/MMU/MMU.sv @@ -60,12 +60,12 @@ module MMU ( I_CACHE, I_CACHE_REFILL } istate_t; - typedef enum bit [2:0] { + typedef enum bit [3:0] { DR_IDLE, DR_WA, DR_WD1, DR_WD2, DR_WD3, DR_WD4, DR_REFILL, - DR_CACHE + DR_ICACHE, DR_CACHE, DR_CACHE_REFILL, DR_CACHE_CLEAR, DR_CACHE_REQ } drstate_t; typedef enum bit [2:0] { @@ -296,6 +296,9 @@ module MMU ( word_t ddAddr1; logic [127:0] ddData1; + // D-Cache Clear + logic dClrValid, dClrRv, dClrReq; + // ============================ // ======== dFlip-Flop ======== // ============================ @@ -329,14 +332,34 @@ module MMU ( drNextState = drState; data.data_ok = 0; rdata_axi.req = 0; + dc.clear = 0; + dc.clearIdx = 0; + dc.clearWb = 0; + dClrValid = 0; + dClrRv = 0; + dClrReq = 0; case (drState) DR_IDLE: begin - if (diReq) drNextState = DR_CACHE; - else if (~dValid1) dEn = 1; + if (diReq) drNextState = DR_ICACHE; + else if (dReq1 & cacheOp1[2] & dCached1) begin + if (cacheOp2[0]) begin + // Do not write back + // dc.valid = dc.wvalid = 1; + dClrValid = 1; + dc.clear = 1; + dc.clearIdx = cacheOp2[1]; + drNextState = DR_CACHE_REFILL; + end else begin + // WriteBack + dc.clearWb = 1; + drNextState = DR_CACHE; + dEn2 = 1; + end + end else if (~dValid1) dEn = 1; else begin dEn2 = 1; if (data.wr) data.data_ok = 1; - if (data.wr & (~dCached1 | dc.hit | cacheOp1[2])) drNextState = DR_REFILL; + if (data.wr & (~dCached1 | dc.hit)) drNextState = DR_REFILL; else if (dCached1 & dc.hit) begin dEn = 1; data.data_ok = 1; @@ -386,7 +409,7 @@ module MMU ( drNextState = DR_IDLE; end end - DR_CACHE: begin + DR_ICACHE: begin /* * 该状态是 I-CACHE 的清除指令 * 当 iState == I_CACHE_REFILL 代表下一个周期恢复正常工作 @@ -397,6 +420,38 @@ module MMU ( drNextState = DR_IDLE; end end + DR_CACHE: begin + // WriteBack + // Clear By Index or Address + // D-Cache: state == REPLACE + if (wdata_ok) begin + if (cacheOp2[1]) begin + drNextState = DR_CACHE_CLEAR; + end else begin + dClrRv = 1; + dc.clear = 1; + drNextState = DR_CACHE_REFILL; + end + // drNextState = DR_CACHE_CLEAR; + end + end + DR_CACHE_REFILL: begin + // avoid deadlock + dEn = 1; + drNextState = DR_IDLE; + data.data_ok = 1; + end + DR_CACHE_CLEAR: begin + // deal with timing loop + dClrRv = 1; + dc.clear = 1; + drNextState = DR_CACHE_REQ; + end + DR_CACHE_REQ: begin + // deal with timing loop + dClrReq = 1; // use dClrReq to start a new D-Cache req + drNextState = DR_IDLE; + end endcase end @@ -409,16 +464,30 @@ module MMU ( * Cache 指令当成写指令处理 * TLB 转换请求和 D-Cache 请求与普通访存一致 * dc.req 在 I-Cache Cache 指令发生时为0 - * 清除请求时 drState 进入 DR_REFILL 状态,后由 DW_STATE 确定返回 * dwState 和 dwaState 需判断是否是 CACHE 指令且是否允许写回 * 屏蔽 dAddressError 和 TLBModified 且只有在 Address 类型请求下允许 TLBRefill 和 TLBInvalid - * dc.clear 在查询完成后一阶段或者写回完成阶段发送 + * Index WriteBack 转换成 Hit WriteBack(s) + Index Invalid */ + /* 状态 + D-Cache: + IDLE <- dc.req + dc.index + LOOKUP <- dc.tag1: Lookup模式下需要传入正确的tag1 + dc.valid: 1 + dc.wvalid: 1 + dc.clear: Hit Invalid and Index Invalid + dc.clearIdx: Hit Invalid or Index Invalid + dc.clearWb: 是否需要写回 + -> dc.dirt_valid + dc.dirt_addr + dc.dirt_data + REPLACE <- dc.rvalid + dc.clear: same cycle as dc.rvalid + */ - assign dVA = data.addr; - assign diReq = dReq1 & ~cacheOp1[2] & |cacheOp1[1:0]; - assign dcReq1 = dReq1 & (cacheOp1 == CNOP | cacheOp1[2]); // exclude I-Cache clear - assign dValid1 = dReq1 & dHit1 & dMValid1 & (~data.wr | dDirty1) & (in_kernel | dUser1); + assign dVA = data.addr; + assign diReq = dReq1 & ~cacheOp1[2] & |cacheOp1[1:0]; + assign dcReq1 = dReq1 & (cacheOp1 == CNOP | cacheOp1[2]); // exclude I-Cache clear + assign dValid1 = dReq1 & dHit1 & dMValid1 & (~data.wr | dDirty1) & (in_kernel | dUser1); assign dTLBRefill = drState == DR_IDLE & dcReq1 & (cacheOp1 == CNOP | ~cacheOp1[1]) & ~dHit1; assign dTLBInvalid = drState == DR_IDLE & dcReq1 & (cacheOp1 == CNOP | ~cacheOp1[1]) & ~dMValid1; @@ -464,12 +533,12 @@ module MMU ( ); // do not request when handling CACHE instruction on I-Cache - assign dc.req = dEn & (cacheOp[2] | ~|cacheOp[1:0]); - assign dc.valid = dValid1 & (dCached1 & (~cacheOp1[2] | dc.hit) | cacheOp1[1]); - assign dc.index = dVA[`DC_TAGL-1:`DC_INDEXL]; - assign dc.tag1 = dEn2 ? dPA1[31:`DC_TAGL] : dPA2[31:`DC_TAGL]; - assign dc.sel1 = dEn2 ? dPA1[3:2] : dPA2[3:2]; - assign dc.rvalid = rdata_axi.rvalid & rdata_axi.data_ok; + assign dc.req = dClrReq | dEn & (cacheOp[2] | ~|cacheOp[1:0]); + assign dc.valid = dClrValid | dValid1 & dCached1; + assign dc.index = dEn ? dVA[`DC_TAGL-1:`DC_INDEXL] : dVA1[`DC_TAGL-1:`DC_INDEXL]; + assign dc.tag1 = dEn2 ? dPA1[31:`DC_TAGL] : dPA2[31:`DC_TAGL]; + assign dc.sel1 = dEn2 ? dPA1[3:2] : dPA2[3:2]; + assign dc.rvalid = dClrRv | rdata_axi.rvalid & rdata_axi.data_ok; mux4 #(128) dc_rdata_mux ( {rdata_axi.rdata, drD3, drD2, drD1}, {drD3, drD2, drD1, rdata_axi.rdata}, @@ -508,7 +577,7 @@ module MMU ( if (dEn2 & (~dCached1 & data.wr | dCached1 & dc.dirt_valid & (~cacheOp1[2] | ~cacheOp1[0]) // WriteOnly 不允许写回 - & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback Invalid 直接写回 + & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback 或一般情况 )) begin if (dCached1) begin wdata_axi.wdata = dc.dirt_data[31:0]; @@ -583,7 +652,7 @@ module MMU ( if (~wdata_axi.data_ok) dwNextState = DW_WB; else begin // fixme: AXI3 wait WA - if (drState == DR_REFILL) dwNextState = DW_IDLE; + if (drState == DR_REFILL | drState == DR_CACHE_REFILL) dwNextState = DW_IDLE; else dwNextState = DW_WAITR; end end @@ -592,12 +661,12 @@ module MMU ( // TODO: goto IDLE on failure if (wdata_axi.data_ok) begin // fixme: AXI3 wait WA - if (drState == DR_REFILL) dwNextState = DW_IDLE; + if (drState == DR_REFILL | drState == DR_CACHE_REFILL) dwNextState = DW_IDLE; else dwNextState = DW_WAITR; end end DW_WAITR: begin - if (drState == DR_REFILL) dwNextState = DW_IDLE; + if (drState == DR_REFILL | drState == DR_CACHE_REFILL) dwNextState = DW_IDLE; end endcase end @@ -621,8 +690,8 @@ module MMU ( if (dEn2 & (~dCached1 & data.wr | dCached1 & dc.dirt_valid & (~cacheOp1[2] | ~cacheOp1[0]) // WriteOnly 不允许写回 - & (~dc.hit | cacheOp1[2] & ~cacheOp1[0])) // Writeback Invalid 直接写回 - ) begin + & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback 或一般情况 + )) begin wdata_axi.req = 1'b1; if (~wdata_axi.addr_ok) dwaNextState = DWA_WA; end @@ -658,11 +727,9 @@ module MMU ( assign wdata_axi.addr = (dEn2 ? dCached1 : dCached2) ? (dwaState == DWA_IDLE) ? dc.dirt_addr : ddAddr1 : dEn2 ? dPA1 : dPA2; assign wdata_axi.len = (dEn2 ? dCached1 : dCached2) ? 4'b0011 : 4'b0000; assign wdata_axi.size = (dEn2 ? dCached1 : dCached2) ? 3'b010 : {1'b0, dSize1}; - assign dc.wvalid = dEn2 ? data.wr : dwr1; + assign dc.wvalid = dClrValid | dEn2 ? data.wr : dwr1; assign dc.wdata = dEn2 ? data.wdata : dWdata1; assign dc.wstrb = dEn2 ? data.wstrb : dWstrb1; - assign dc.clear = dEn2 ? cacheOp1[2] & dCached1 & (dc.hit | cacheOp1[1]) - : cacheOp2[2] & dCached2; // ============================== // ========== VA -> PA ========== diff --git a/src/include/DCache.svh b/src/include/DCache.svh index 423709b..c5235a1 100644 --- a/src/include/DCache.svh +++ b/src/include/DCache.svh @@ -50,20 +50,22 @@ interface DCache_i; DCData_t dirt_data; DCData_t row; logic clear; + logic clearIdx; + logic clearWb; modport cache( input req, valid, input index, tag1, sel1, input rvalid, rdata, wvalid, wdata, wstrb, output hit, dirt_valid, dirt_addr, dirt_data, row, - input clear + input clear, clearIdx, clearWb ); modport mmu( output req, valid, output index, tag1, sel1, output rvalid, rdata, wvalid, wdata, wstrb, input hit, dirt_valid, dirt_addr, dirt_data, row, - output clear + output clear, clearIdx, clearWb ); endinterface //DCache_i