ipq4019 kernel 报错spi-nand spi0.1: spi transfer failed: -110

月上柳青

于 2022-06-09 20:08:11 发布

阅读量1.5k

点赞数

分类专栏： openwrt 文章标签： openwrt

本文链接：https://blog.csdn.net/yang_quan_yang/article/details/125210079

版权

openwrt 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

该博客介绍了针对SPIQUP驱动程序的错误处理改进，包括实现`handle_err`函数以处理读取错误，增强SQUASHFS文件系统的错误抵抗力，并增加I/O重试次数以提高系统对错误的容忍度。通过这些补丁，系统能更好地应对坏块和临时I/O故障，提高性能和稳定性。

摘要由CSDN通过智能技术生成

原因是：spi nand flash有坏快，可以合入如下的patch，提高spi的性能tiao过坏块

报错：SQUASHFS error: Unable to read page, block 325e36, size c97

合入如下patch：

提高了 SQUASHFS 处理读取错误的能力。经过应用此补丁，缓存中损坏的页面将不再导致至少在大多数情况下，减压持续失败。

为 SPI QUP 驱动程序实现 `handle_err` 函数。此外，删除呼叫到两个已弃用的内核 API 函数，将它们替换为当前的推荐的版本。

9996-fs_squashfs_improve_squashfs_error_resistance.patch

From: Oever González <notengobattery@gmail.com>
Subject: [PATCH] fs: SQUASHFS: improve SQUASHFS error resistance
Date: Sat, 25 Jul 2020 18:49:31 -0600

This patch greatly improves the SQUASHFS's ability to deal with read errors. By
applying this patch, a corrupted page in the cache will not longer lead to a
persistent failure in decompression, at least most of the times.

Signed-off-by: Oever González <notengobattery@gmail.com>
---
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -65,6 +65,42 @@ static struct buffer_head *get_block_length(struct super_block *sb,
 	return bh;
 }
 
+/*
+ * Kill the pages if an error is found, this will try to evict them from the
+ * page cache forcing the kernel to read again the device, hopping that the read
+ * error is just transitory.
+ *
+ * This efectively cleans the "back" cache. Normal file systems will do this,
+ * since this makes them more resistant to sparse or transitory I/O failures.
+ *
+ */
+static inline void __squashfs_kill_pages(struct buffer_head **bh, int quantity)
+{
+	int index = 0, total_pages = 0;
+	struct buffer_head *bh_head;
+	struct buffer_head *bh_current;
+
+	for (index = 0; index < quantity; index++) {
+		bh_head = bh[index];
+		bh_current = bh[index];
+
+		do {
+			struct page *bh_page = bh_current->b_page;
+
+			lock_page(bh_page);
+			delete_from_page_cache(bh_page);
+			ClearPageUptodate(bh_page);
+			SetPageError(bh_page);
+			total_pages++;
+			unlock_page(bh_page);
+
+			clear_buffer_uptodate(bh_current);
+			bh_current = bh_current->b_this_page;
+		} while (bh_current != bh_head);
+	}
+
+	WARNING("killed %d pages, %d buffer heads\n", total_pages, quantity);
+}
 
 /*
  * Read and decompress a metadata block or datablock.  Length is non-zero
@@ -75,8 +111,8 @@ static struct buffer_head *get_block_length(struct super_block *sb,
  * generated a larger block - this does occasionally happen with compression
  * algorithms).
  */
-int squashfs_read_data(struct super_block *sb, u64 index, int length,
-		u64 *next_index, struct squashfs_page_actor *output)
+static inline int __squashfs_read_data(struct super_block *sb, u64 index,
+		int length, u64 *next_index, struct squashfs_page_actor *output)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
 	struct buffer_head **bh;
@@ -194,11 +230,51 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 
 block_release:
 	for (; k < b; k++)
-		put_bh(bh[k]);
+		brelse(bh[k]);
 
 read_failure:
-	ERROR("squashfs_read_data failed to read block 0x%llx\n",
-					(unsigned long long) index);
+	__squashfs_kill_pages(bh, b);
 	kfree(bh);
 	return -EIO;
 }
+
+/*
+ * If some kind of error is detected, block into this loop rather than crashing
+ * the process that requested the data, since it can be `init` and crashing it
+ * will lead to a kernel panic. If the read still failing, the process is doomed
+ * to crash anyway.
+ *
+ * This only makes SQUASHFS more error resistant by avoiding the poisoning of
+ * the "front" cache if the first attempt failed.
+ *
+ */
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+		u64 *next_index, struct squashfs_page_actor *output)
+{
+	int ret = 0, attempts = 0;
+	unsigned long long block = (unsigned long long) index;
+	u64 saved_next_index;
+
+	if (next_index)
+		saved_next_index = *next_index;
+
+	ret = __squashfs_read_data(sb, index, length, next_index, output);
+
+	while (ret < 0 && attempts < 5) {  // Retry 5 times, a total of 6 attempts
+		attempts++;
+		TRACE("failed to read block [%llx], retry attempt %d\n",
+			block, attempts);
+		if (next_index)
+			*next_index = saved_next_index;
+		ret = __squashfs_read_data(sb, index, length, next_index, output);
+	}
+
+	if (attempts > 0 && ret >= 0)
+		TRACE("read_data: success after %d attempts to read block [%llx]\n",
+			 attempts, block);
+	else if (attempts > 0 && ret < 0)
+		ERROR("read_data: failed after %d attempts to read block [%llx]\n",
+			  attempts + 1);
+
+	return ret;
+}
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -112,8 +112,12 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
 
 			spin_lock(&cache->lock);
 
-			if (entry->length < 0)
+			if (entry->length < 0) {
 				entry->error = entry->length;
+				WARNING("Invalidated %s cache entry [%llx]\n", cache->name,
+					entry->block);
+				entry->block = SQUASHFS_INVALID_BLK;
+			}
 
 			entry->pending = 0;
 
--- a/fs/squashfs/decompressor_multi.c
+++ b/fs/squashfs/decompressor_multi.c
@@ -189,8 +189,6 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
 	res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
 		bh, b, offset, length, output);
 	put_decomp_stream(decomp_stream, stream);
-	if (res < 0)
-		ERROR("%s decompression failed, data probably corrupt\n",
-			msblk->decompressor->name);
+
 	return res;
 }
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -82,10 +82,6 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
 		offset, length, output);
 	put_cpu_ptr(stream);
 
-	if (res < 0)
-		ERROR("%s decompression failed, data probably corrupt\n",
-			msblk->decompressor->name);
-
 	return res;
 }
 
--- a/fs/squashfs/decompressor_single.c
+++ b/fs/squashfs/decompressor_single.c
@@ -70,10 +70,6 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
 		offset, length, output);
 	mutex_unlock(&stream->mutex);
 
-	if (res < 0)
-		ERROR("%s decompression failed, data probably corrupt\n",
-			msblk->decompressor->name);
-
 	return res;
 }
 
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -452,6 +452,7 @@ static int __init init_squashfs_fs(void)
 	}
 
 	pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
+	pr_info("patched 4.0 (2020/11/10) Oever Gonzalez\n");
 
 	return 0;
 }

9994-mtd_ubi_improve_the_i_o_retries.patch

From: Oever González <notengobattery@gmail.com>
Subject: [PATCH] mtd: UBI: improve the I/O retries
Date: Thu, 23 Jul 2020 09:41:17 -0600

In order to make UBI more error resistant, and because this also affects the
stress test (to check if a PEB has died), improve the retries number from the
current value of 3 to 4.

Signed-off-by: Oever González <notengobattery@gmail.com>
---
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -76,6 +76,7 @@
 #include <linux/crc32.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 #include "ubi.h"
 
 static int self_check_not_bad(const struct ubi_device *ubi, int pnum);
@@ -173,7 +174,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
 		if (retries++ < UBI_IO_RETRIES) {
 			ubi_warn(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read only %zd bytes, retry",
 				 err, errstr, len, pnum, offset, read);
-			yield();
+			msleep(100);
 			goto retry;
 		}
 
@@ -328,7 +329,7 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum)
 		if (retries++ < UBI_IO_RETRIES) {
 			ubi_warn(ubi, "error %d while erasing PEB %d, retry",
 				 err, pnum);
-			yield();
+			msleep(100);
 			goto retry;
 		}
 		ubi_err(ubi, "cannot erase PEB %d, error %d", pnum, err);
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -63,7 +63,7 @@ void ubi_err(const struct ubi_device *ubi, const char *fmt, ...);
  * In case of errors, UBI tries to repeat the operation several times before
  * returning error. The below constant defines how many times UBI re-tries.
  */
-#define UBI_IO_RETRIES 3
+#define UBI_IO_RETRIES 4
 
 /*
  * Length of the protection queue. The length is effectively equivalent to the

9995-spi_qup_implement_error_handle_function.patch

From: Oever González <notengobattery@gmail.com>
Subject: [PATCH] SPI: QUP: implement error handle function
Date: Thu, 23 Jul 2020 22:19:28 -0600

Implement the `handle_err` function for the SPI QUP driver. Also, remove calls
to two deprecated kernel API functions, replacing them with the currently
recommended versions.

Signed-off-by: Oever González <notengobattery@gmail.com>
---
--- a/drivers/spi/spi-qup.c
+++ a/drivers/spi/spi-qup.c
@@ -417,9 +417,9 @@ static void spi_qup_dma_terminate(struct spi_master *master,
 				  struct spi_transfer *xfer)
 {
 	if (xfer->tx_buf)
-		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_sync(master->dma_tx);
 	if (xfer->rx_buf)
-		dmaengine_terminate_all(master->dma_rx);
+		dmaengine_terminate_sync(master->dma_rx);
 }
 
 static u32 spi_qup_sgl_get_nents_len(struct scatterlist *sgl, u32 max,
@@ -932,11 +932,11 @@ static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
 	int ret;
 
 	/* allocate dma resources, if available */
-	master->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	master->dma_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR(master->dma_rx))
 		return PTR_ERR(master->dma_rx);
 
-	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	master->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(master->dma_tx)) {
 		ret = PTR_ERR(master->dma_tx);
 		goto err_tx;
@@ -992,6 +992,24 @@ static void spi_qup_set_cs(struct spi_device *spi, bool val)
 		writel_relaxed(spi_ioc, controller->base + SPI_IO_CONTROL);
 }
 
+static void spi_qup_handle_err(struct spi_master *master,
+				 struct spi_message *msg)
+{
+	struct spi_qup *controller = spi_master_get_devdata(master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&controller->lock, flags);
+	controller->error    = 0;
+	controller->rx_bytes = 0;
+	controller->tx_bytes = 0;
+	spin_unlock_irqrestore(&controller->lock, flags);
+
+	spi_qup_set_state(controller, QUP_STATE_RESET);
+
+	if (spi_qup_is_dma_xfer(controller->mode))
+		spi_qup_dma_terminate(master, controller->xfer);
+}
+
 static int spi_qup_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
@@ -1063,6 +1081,7 @@ static int spi_qup_probe(struct platform_device *pdev)
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	master->max_speed_hz = max_freq;
 	master->transfer_one = spi_qup_transfer_one;
+	master->handle_err = spi_qup_handle_err;
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
 	master->dma_alignment = dma_get_cache_alignment();