From e561c6303639ed510183da25d3d54555a53371c9 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@wdc.com>
Date: Thu, 26 Sep 2019 09:55:25 +0530
Subject: lib: Fix coldboot race condition observed on emulators/simulators

If we are running on RISC-V emulator/simulator with large number of
HARTs where each HART is a regular thread under UNIX host then it is
possible that some of the secondary HARTs don't get chance to run and
sbi_hart_wake_coldboot_harts() is called before secondary HARTs call
sbi_hart_wait_for_coldboot(). In this situation, some of the secondary
HARTs will never come-out of coldboot wait loop.

To tackle this, we introduce a global flag coldboot_done which will
be protected by coldboot lock and it will be set by primary HART from
sbi_hart_wake_coldboot_harts() before waking-up secondary HARTs. We
also re-arrange acquire/release of coldboot lock to reduce further
chances of race-condition.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Reviewed-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Nylon Chen<nylon7@andestech.com>
---
 lib/sbi/sbi_hart.c | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/lib/sbi/sbi_hart.c b/lib/sbi/sbi_hart.c
index b0c087a..dee2e21 100644
--- a/lib/sbi/sbi_hart.c
+++ b/lib/sbi/sbi_hart.c
@@ -344,12 +344,12 @@ struct sbi_scratch *sbi_hart_id_to_scratch(struct sbi_scratch *scratch,
 }
 
 #define COLDBOOT_WAIT_BITMAP_SIZE __riscv_xlen
-static spinlock_t coldboot_wait_bitmap_lock = SPIN_LOCK_INITIALIZER;
-static unsigned long coldboot_wait_bitmap   = 0;
+static spinlock_t coldboot_lock = SPIN_LOCK_INITIALIZER;
+static unsigned long coldboot_done = 0;
+static unsigned long coldboot_wait_bitmap = 0;
 
 void sbi_hart_wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid)
 {
-	unsigned long mipval;
 	const struct sbi_platform *plat = sbi_platform_ptr(scratch);
 
 	if ((sbi_platform_hart_count(plat) <= hartid) ||
@@ -359,19 +359,26 @@ void sbi_hart_wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid)
 	/* Set MSIE bit to receive IPI */
 	csr_set(CSR_MIE, MIP_MSIP);
 
-	do {
-		spin_lock(&coldboot_wait_bitmap_lock);
-		coldboot_wait_bitmap |= (1UL << hartid);
-		spin_unlock(&coldboot_wait_bitmap_lock);
+	/* Acquire coldboot lock */
+	spin_lock(&coldboot_lock);
 
+	/* Mark current HART as waiting */
+	coldboot_wait_bitmap |= (1UL << hartid);
+
+	/* Wait for coldboot to finish using WFI */
+	while (!coldboot_done) {
+		spin_unlock(&coldboot_lock);
 		wfi();
-		mipval = csr_read(CSR_MIP);
+		spin_lock(&coldboot_lock);
+	};
+
+	/* Unmark current HART as waiting */
+	coldboot_wait_bitmap &= ~(1UL << hartid);
 
-		spin_lock(&coldboot_wait_bitmap_lock);
-		coldboot_wait_bitmap &= ~(1UL << hartid);
-		spin_unlock(&coldboot_wait_bitmap_lock);
-	} while (!(mipval & MIP_MSIP));
+	/* Release coldboot lock */
+	spin_unlock(&coldboot_lock);
 
+	/* Clear current HART IPI */
 	sbi_platform_ipi_clear(plat, hartid);
 }
 
@@ -380,11 +387,18 @@ void sbi_hart_wake_coldboot_harts(struct sbi_scratch *scratch, u32 hartid)
 	const struct sbi_platform *plat = sbi_platform_ptr(scratch);
 	int max_hart			= sbi_platform_hart_count(plat);
 
+	/* Acquire coldboot lock */
+	spin_lock(&coldboot_lock);
+
+	/* Mark coldboot done */
+	coldboot_done = 1;
+
+	/* Send an IPI to all HARTs waiting for coldboot */
 	for (int i = 0; i < max_hart; i++) {
-		/* send an IPI to every other hart */
-		spin_lock(&coldboot_wait_bitmap_lock);
 		if ((i != hartid) && (coldboot_wait_bitmap & (1UL << i)))
 			sbi_platform_ipi_send(plat, i);
-		spin_unlock(&coldboot_wait_bitmap_lock);
 	}
+
+	/* Release coldboot lock */
+	spin_unlock(&coldboot_lock);
 }
-- 
cgit v1.2.3