diff --git a/README.md b/README.md index da4a71d..494d0c0 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ Our awesome `MIPS` CPU written in `SystemVerilog` for Loongson Cup | :----------------: | :-------------------------------: | :------: | :--: | :-------------------------------------- | | :heavy_check_mark: | `I-Cache Index Invalid` | `SYS` | 2 | | | :heavy_check_mark: | `I-Cache Hit Invalid` | `SYS` | 2 | | -| :hourglass: | `D-Cache Index Writeback Invalid` | `SYS` | 2 | :cry: | -| :hourglass: | `D-Cache Index Store Tag` | `SYS` | 2 | | +| :heavy_check_mark: | `D-Cache Index Writeback Invalid` | `SYS` | 2 | :cry: | +| :heavy_check_mark: | `D-Cache Index Store Tag` | `SYS` | 2 | | | :heavy_check_mark: | `D-Cache Hit Invalid` | `SYS` | 2 | | | :heavy_check_mark: | `D-Cache Hit Writeback Invalid` | `SYS` | 2 | | | :heavy_check_mark: | `PREF` | `SYS` | 1 | Treat as `NOP` | diff --git a/resources/2021/soft/func/inst/n98_cache_dcache.S b/resources/2021/soft/func/inst/n98_cache_dcache.S index 6c72f90..63bfc1f 100644 --- a/resources/2021/soft/func/inst/n98_cache_dcache.S +++ b/resources/2021/soft/func/inst/n98_cache_dcache.S @@ -92,6 +92,7 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) add v1, v0, v1 .n98_con_prepare3_loop: beq v0, v1, .n98_con_check3 + nop sw a0, 0(v0) sw a0, 4(v0) sw a0, 8(v0) @@ -110,6 +111,7 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) add v1, v0, v1 .n98_con_check4_loop: beq v0, v1, .n98_con_end + nop lw a1, 0(v0) bne a0, a1, inst_error nop @@ -134,18 +136,19 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) ## CACHE 9 D-Cache Index Store Tag .n98_c1_prepare: - li a0, 0x12345678 + li a0, 0x11223344 li v0, 0x800d0000 li v1, 0x800d0200 .n98_c1_loop: beq v0, v1, .n98_c1_check + nop sw a0, 0(v0) sw a0, 4(v0) sw a0, 8(v0) sw a0, 12(v0) addi a1, v0, 0 GET_DCACHE_INDEX - cache 0, 0(v0) + cache 1, 0(v0) addi v0, a1, 16 j .n98_c1_loop nop @@ -180,6 +183,7 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) li v1, 0x800d0200 .n98_c9_loop: beq v0, v1, .n98_c9_check + nop sw zero, 0(v0) sw zero, 4(v0) sw zero, 8(v0) @@ -195,6 +199,7 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) li v1, 0xa00d0200 .n98_c9_check_loop: beq v0, v1, .n98_c9_check2 + nop lw a1, 0(v0) bne a0, a1, inst_error nop @@ -206,6 +211,7 @@ TEST_CACHE_DCACHE_HIT(0xa00d0028, 0x800d0028, 4, 0xF0F0F0F0, 0xABCDEFAB) li v1, 0x800d0200 .n98_c9_check2_loop: beq v0, v1, .n98_done + nop lw a1, 0(v0) bne a0, a1, inst_error nop diff --git a/src/Cache/DCache.sv b/src/Cache/DCache.sv index 6a43848..a2bbcc1 100644 --- a/src/Cache/DCache.sv +++ b/src/Cache/DCache.sv @@ -52,12 +52,15 @@ module DCache ( DCData_t wdata1[4], wdata2[4]; + logic clear; + // =========================== // ======== Flip-Flop ======== // =========================== - ffen #(`DC_TAGL-`DC_INDEXL) index_ff (clk, port.index, port.req, index1); - ffen #(4) wen_ff (clk, wen, en2, wen2); + ffen #(`DC_TAGL-`DC_INDEXL) index_ff (clk, port.index, port.req, index1); + ffen #(4) wen_ff (clk, wen, en2, wen2); + ffen #(1) clear_ff (clk, port.clearWb, en2, clear); // =============================== // ======== State Machine ======== @@ -82,12 +85,12 @@ module DCache ( end end LOOKUP: begin - if (~port.valid) begin + if (~port.valid & ~port.clearWb) begin if (~port.req) begin nextState = IDLE; end end else begin - if (hit & ~port.clearWb | port.clear & port.clearIdx) begin + if (hit & ~port.clearWb | port.clear & ~port.clearWb) begin if (port.wvalid) begin bwe1 = 1'b1; nextState = IDLE; @@ -136,7 +139,8 @@ module DCache ( assign hitWay[1] = tagV[1] & tagOut[1].tag == port.tag1; assign hitWay[2] = tagV[2] & tagOut[2].tag == port.tag1; assign hitWay[3] = tagV[3] & tagOut[3].tag == port.tag1; - assign hit = |{hitWay}; + // 在 clearWb状态下确保命中 + assign hit = |{hitWay} | port.clear & port.clearWb; assign cacheLine = (hitWay[0] ? dataOut[0] : `DC_DATA_LENGTH'b0) | (hitWay[1] ? dataOut[1] : `DC_DATA_LENGTH'b0) @@ -151,7 +155,13 @@ module DCache ( // ============================== // Choose Victim - assign victim = port.clearWb ? hitWay + assign victim = port.clear & port.clearWb & ~port.clearIdx ? hitWay // Hit Address Writeback -> hitWay + // Hit Index Writeback -> clear valid + dirty way + : port.clear & port.clearWb & port.clearIdx & tagV[0] & tagOut[0].dirty ? 4'b0001 + : port.clear & port.clearWb & port.clearIdx & tagV[1] & tagOut[1].dirty ? 4'b0010 + : port.clear & port.clearWb & port.clearIdx & tagV[2] & tagOut[2].dirty ? 4'b0100 + : port.clear & port.clearWb & port.clearIdx & tagV[3] & tagOut[3].dirty ? 4'b1000 + // Normal mode : tagV[0] == 0 ? 4'b0001 : tagV[1] == 0 ? 4'b0010 : tagV[2] == 0 ? 4'b0100 @@ -164,8 +174,11 @@ module DCache ( : nowLRU[1] == 0 ? 4'b0010 : nowLRU[2] == 0 ? 4'b0100 : 4'b1000; - assign wen = port.clear & port.clearIdx ? 4'b1111 - : hit ? hitWay : victim; + + assign wen = port.clear & port.clearIdx & ~port.clearWb ? 4'b1111 // Index Invalidate + : port.clear & ~port.clearIdx & ~port.clearWb ? hitWay // Hit Invalidate + : port.clear & port.clearWb ? victim // Writeback Invalidate + : hit ? hitWay : victim; assign port.dirt_valid = (state == LOOKUP) & |{tagV & {tagOut[3].dirty, tagOut[2].dirty, tagOut[1].dirty, tagOut[0].dirty} & victim}; @@ -182,7 +195,8 @@ module DCache ( | (victim[3] ? dataOut[3] : `DC_DATA_LENGTH'b0); // Update LRU - assign nextLRU = { + assign nextLRU = port.clear & port.clearIdx ? nowLRU : + { wen[3] | nowLRU[3] & ~&{nowLRU | wen}, wen[2] | nowLRU[2] & ~&{nowLRU | wen}, wen[1] | nowLRU[1] & ~&{nowLRU | wen}, @@ -232,10 +246,10 @@ module DCache ( assign DataRAM2.wen = (bwe1 & wen[2]) | (bwe2 & wen2[2]); assign DataRAM3.wen = (bwe1 & wen[3]) | (bwe2 & wen2[3]); // 写数据 - assign TagRAM0.wdata = {port.tag1, port.wvalid, ~port.clear}; - assign TagRAM1.wdata = {port.tag1, port.wvalid, ~port.clear}; - assign TagRAM2.wdata = {port.tag1, port.wvalid, ~port.clear}; - assign TagRAM3.wdata = {port.tag1, port.wvalid, ~port.clear}; + assign TagRAM0.wdata = {port.tag1, port.wvalid, ~(port.clear | bwe2 & clear)}; + assign TagRAM1.wdata = {port.tag1, port.wvalid, ~(port.clear | bwe2 & clear)}; + assign TagRAM2.wdata = {port.tag1, port.wvalid, ~(port.clear | bwe2 & clear)}; + assign TagRAM3.wdata = {port.tag1, port.wvalid, ~(port.clear | bwe2 & clear)}; assign DataRAM0.wdata = state == LOOKUP ? wdata1[0] : wdata2[0]; assign DataRAM1.wdata = state == LOOKUP ? wdata1[1] : wdata2[1]; diff --git a/src/MMU/MMU.sv b/src/MMU/MMU.sv index 41de194..f86ff97 100644 --- a/src/MMU/MMU.sv +++ b/src/MMU/MMU.sv @@ -65,7 +65,7 @@ module MMU ( DR_WA, DR_WD1, DR_WD2, DR_WD3, DR_WD4, DR_REFILL, - DR_ICACHE, DR_CACHE, DR_CACHE_REFILL, DR_CACHE_CLEAR, DR_CACHE_REQ + DR_ICACHE, DR_CACHE, DR_CACHE_REFILL, DR_CACHE_REQ } drstate_t; typedef enum bit [2:0] { @@ -98,7 +98,7 @@ module MMU ( word_t iD1, iD2, iD3, iD4, iD5, iD6, iD7; logic diReq; - CacheOp_t cacheOp1, cacheOp2; + CacheOp_t cacheOp1; word_t dVA1, diPA; // ================================ @@ -297,7 +297,9 @@ module MMU ( logic [127:0] ddData1; // D-Cache Clear - logic dClrValid, dClrRv, dClrReq; + logic dClrRv, dClrReq; + logic dDirtValid; + logic dCEn, dCClear, dCCached; // ============================ // ======== dFlip-Flop ======== @@ -312,7 +314,9 @@ module MMU ( ffen #(4) dwstrb_ff (clk, data.wstrb, dEn2, dWstrb1); ffen #(32) dwdata_ff (clk, data.wdata, dEn2, dWdata1); ffen #(3) cache_op_ff (clk, cacheOp[2:0], dEn, cacheOp1[2:0]); - ffen #(3) cache_op2_ff (clk, cacheOp1[2:0], dEn2, cacheOp2[2:0]); + + ffen #(1) dDirtValid_ff (clk, dc.dirt_valid, dEn2, dDirtValid); + ffenr #(1) dCCached_ff (clk, dCClear | rst, 1'b1, dCEn, dCCached); // ================================= // ======== drState Machine ======== @@ -329,32 +333,44 @@ module MMU ( always_comb begin dEn = 0; dEn2 = 0; + dCEn = 0; + dCClear = 0; drNextState = drState; data.data_ok = 0; rdata_axi.req = 0; + // D-Cache 清除功能 (与 req 一起发送) dc.clear = 0; dc.clearIdx = 0; dc.clearWb = 0; - dClrValid = 0; + // 直接发送 dc.rvalid dClrRv = 0; + // 直接发送 dc.req dClrReq = 0; case (drState) DR_IDLE: begin if (diReq) drNextState = DR_ICACHE; - else if (dReq1 & cacheOp1[2] & (dCached1 | cacheOp1[1])) begin - if (cacheOp2[0]) begin - // Do not write back - // dc.valid = dc.wvalid = 1; - dClrValid = 1; - dc.clear = 1; - dc.clearIdx = cacheOp2[1]; - drNextState = DR_CACHE_REFILL; + else if (dReq1 & cacheOp1[2] & (dCached1 | dCCached | cacheOp1[1])) begin + if (cacheOp1[0]) begin + // 不需要写回的情况 + // D-Cache 状态机处于 Lookup 阶段 + dc.clear = 1; // 发送清除 + dc.clearIdx = cacheOp1[1]; // 清除时清除整行或者命中对象 + drNextState = DR_CACHE_REFILL; // 进入 REFILL 等候写入完成 end else begin - // WriteBack - dc.clearWb = 1; - drNextState = DR_CACHE; - dEn2 = 1; + // 需要写回 + // 此时 D-Cache 状态机处于 Lookup 状态 + // 可能是: 1. CACHE 请求第一次发送 + // 2. Index Writeback 清除一路后返回 + dc.clear = 1; // 发送清除 + dc.clearIdx = cacheOp1[1]; // 清除时清除整行或者命中对象 + dc.clearWb = 1; // 需要写回的清除 + drNextState = DR_CACHE; // 进入 DR_CACHE 等候写入完成 + dEn2 = 1; // 二阶段 + dCEn = 1; // 缓存 dCached1 end + end else if (dReq1 & cacheOp1[2]) begin + // avoid deadlock when address is uncached + drNextState = DR_CACHE_REFILL; end else if (~dValid1) dEn = 1; else begin dEn2 = 1; @@ -422,34 +438,31 @@ module MMU ( end DR_CACHE: begin // WriteBack - // Clear By Index or Address // D-Cache: state == REPLACE if (wdata_ok) begin - if (cacheOp2[1]) begin - drNextState = DR_CACHE_CLEAR; + dClrRv = 1; // 直接发送 dc.rvalid 通知可写 + // Why cann't I send dc.clear HERE ??? + // dc.clear = 1; + if (cacheOp1[1]) begin + // Clear by Index + if (dDirtValid) + drNextState = DR_CACHE_REQ; // 重新进入准备其它路的清除 + else + drNextState = DR_CACHE_REFILL; // 清除完了 end else begin - dClrRv = 1; - dc.clear = 1; - drNextState = DR_CACHE_REFILL; + // Clear by Address + drNextState = DR_CACHE_REFILL; // 进入 REFILL 等候写入完成 end - // drNextState = DR_CACHE_CLEAR; end end DR_CACHE_REFILL: begin - // avoid deadlock + dCClear = 1; dEn = 1; drNextState = DR_IDLE; data.data_ok = 1; end - DR_CACHE_CLEAR: begin - // deal with timing loop - dClrRv = 1; - dc.clear = 1; - drNextState = DR_CACHE_REQ; - end DR_CACHE_REQ: begin - // deal with timing loop - dClrReq = 1; // use dClrReq to start a new D-Cache req + dClrReq = 1; drNextState = DR_IDLE; end endcase @@ -459,31 +472,6 @@ module MMU ( // ========== dFunction ========== // ================================ - /* - * D-Cache Cache 指令实现备注 - * Cache 指令当成写指令处理 - * TLB 转换请求和 D-Cache 请求与普通访存一致 - * dc.req 在 I-Cache Cache 指令发生时为0 - * dwState 和 dwaState 需判断是否是 CACHE 指令且是否允许写回 - * 屏蔽 dAddressError 和 TLBModified 且只有在 Address 类型请求下允许 TLBRefill 和 TLBInvalid - * Index WriteBack 转换成 Hit WriteBack(s) + Index Invalid - */ - /* 状态 - D-Cache: - IDLE <- dc.req + dc.index - LOOKUP <- dc.tag1: Lookup模式下需要传入正确的tag1 - dc.valid: 1 - dc.wvalid: 1 - dc.clear: Hit Invalid and Index Invalid - dc.clearIdx: Hit Invalid or Index Invalid - dc.clearWb: 是否需要写回 - -> dc.dirt_valid - dc.dirt_addr - dc.dirt_data - REPLACE <- dc.rvalid - dc.clear: same cycle as dc.rvalid - */ - assign dVA = data.addr; assign diReq = dReq1 & ~cacheOp1[2] & |cacheOp1[1:0]; assign dcReq1 = dReq1 & (cacheOp1 == CNOP | cacheOp1[2]); // exclude I-Cache clear @@ -534,7 +522,7 @@ module MMU ( // do not request when handling CACHE instruction on I-Cache assign dc.req = dClrReq | dEn & (cacheOp[2] | ~|cacheOp[1:0]); - assign dc.valid = dClrValid | dValid1 & dCached1; + assign dc.valid = dValid1 & dCached1 | dc.clear; assign dc.index = dEn ? dVA[`DC_TAGL-1:`DC_INDEXL] : dVA1[`DC_TAGL-1:`DC_INDEXL]; assign dc.tag1 = dEn2 ? dPA1[31:`DC_TAGL] : dPA2[31:`DC_TAGL]; assign dc.sel1 = dEn2 ? dPA1[3:2] : dPA2[3:2]; @@ -574,12 +562,12 @@ module MMU ( case (dwState) DW_IDLE: begin - if (dEn2 & (~dCached1 & data.wr - | dCached1 & dc.dirt_valid - & (~cacheOp1[2] | ~cacheOp1[0]) // WriteOnly 不允许写回 - & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback 或一般情况 + if (dEn2 & (~(dCached1 | dCEn) & data.wr + | (dCached1 | dCEn) & dc.dirt_valid + & (~cacheOp1[2] | ~cacheOp1[0]) // WriteOnly 不允许写回 + & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback 或一般情况 )) begin - if (dCached1) begin + if (dCached1 | dCEn) begin wdata_axi.wdata = dc.dirt_data[31:0]; wdata_axi.wstrb = 4'b1111; wdata_axi.wvalid = 1'b1; @@ -592,7 +580,7 @@ module MMU ( if (~wdata_axi.wready) dwNextState = DW_WD1; else begin - if (dCached1) dwNextState = DW_WD2; + if (dCached1 | dCEn) dwNextState = DW_WD2; else begin if (~wdata_axi.data_ok) dwNextState = DW_WB; else begin @@ -605,7 +593,7 @@ module MMU ( end end DW_WD1: begin - if (dCached2) begin + if (dCached2 | dCCached) begin wdata_axi.wdata = ddData1[31:0]; wdata_axi.wstrb = 4'b1111; wdata_axi.wvalid = 1'b1; @@ -617,7 +605,7 @@ module MMU ( end if (wdata_axi.wready) begin - if (dCached2) dwNextState = DW_WD2; + if (dCached2 | dCCached) dwNextState = DW_WD2; else begin if (~wdata_axi.data_ok) dwNextState = DW_WB; else begin @@ -652,7 +640,7 @@ module MMU ( if (~wdata_axi.data_ok) dwNextState = DW_WB; else begin // fixme: AXI3 wait WA - if (drState == DR_REFILL | drState == DR_CACHE_REFILL) dwNextState = DW_IDLE; + if (drState == DR_REFILL | drState == DR_CACHE_REFILL | drState == DR_CACHE_REQ) dwNextState = DW_IDLE; else dwNextState = DW_WAITR; end end @@ -661,12 +649,12 @@ module MMU ( // TODO: goto IDLE on failure if (wdata_axi.data_ok) begin // fixme: AXI3 wait WA - if (drState == DR_REFILL | drState == DR_CACHE_REFILL) dwNextState = DW_IDLE; + if (drState == DR_REFILL | drState == DR_CACHE_REFILL | drState == DR_CACHE_REQ) dwNextState = DW_IDLE; else dwNextState = DW_WAITR; end end DW_WAITR: begin - if (drState == DR_REFILL | drState == DR_CACHE_REFILL) dwNextState = DW_IDLE; + if (drState == DR_REFILL | drState == DR_CACHE_REFILL | drState == DR_CACHE_REQ) dwNextState = DW_IDLE; end endcase end @@ -687,10 +675,10 @@ module MMU ( case (dwaState) DWA_IDLE: begin - if (dEn2 & (~dCached1 & data.wr - | dCached1 & dc.dirt_valid - & (~cacheOp1[2] | ~cacheOp1[0]) // WriteOnly 不允许写回 - & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback 或一般情况 + if (dEn2 & (~(dCached1 | dCEn) & data.wr + | (dCached1 | dCEn) & dc.dirt_valid + & (~cacheOp1[2] | ~cacheOp1[0]) // WriteOnly 不允许写回 + & (~dc.hit | cacheOp1[2] & ~cacheOp1[0]) // Writeback 或一般情况 )) begin wdata_axi.req = 1'b1; if (~wdata_axi.addr_ok) dwaNextState = DWA_WA; @@ -724,10 +712,10 @@ module MMU ( // ========== dwFunction ========== // ================================ - assign wdata_axi.addr = (dEn2 ? dCached1 : dCached2) ? (dwaState == DWA_IDLE) ? dc.dirt_addr : ddAddr1 : dEn2 ? dPA1 : dPA2; - assign wdata_axi.len = (dEn2 ? dCached1 : dCached2) ? 4'b0011 : 4'b0000; - assign wdata_axi.size = (dEn2 ? dCached1 : dCached2) ? 3'b010 : {1'b0, dSize1}; - assign dc.wvalid = dClrValid | dEn2 ? data.wr : dwr1; + assign wdata_axi.addr = ((dEn2 ? dCached1 : dCached2) | dCEn) ? (dwaState == DWA_IDLE) ? dc.dirt_addr : ddAddr1 : dEn2 ? dPA1 : dPA2; + assign wdata_axi.len = ((dEn2 ? dCached1 : dCached2) | dCEn) ? 4'b0011 : 4'b0000; + assign wdata_axi.size = ((dEn2 ? dCached1 : dCached2) | dCEn) ? 3'b010 : {1'b0, dSize1}; + assign dc.wvalid = dEn2 ? data.wr : dwr1; assign dc.wdata = dEn2 ? data.wdata : dWdata1; assign dc.wstrb = dEn2 ? data.wstrb : dWstrb1;