原因是:spi nand flash有坏快,可以合入如下的patch,提高spi的性能tiao过坏块
报错:SQUASHFS error: Unable to read page, block 325e36, size c97
合入如下patch:
提高了 SQUASHFS 处理读取错误的能力。经过 应用此补丁,缓存中损坏的页面将不再导致 至少在大多数情况下,减压持续失败。
为 SPI QUP 驱动程序实现 `handle_err` 函数。此外,删除呼叫 到两个已弃用的内核 API 函数,将它们替换为当前的 推荐的版本。
- 9996-fs_squashfs_improve_squashfs_error_resistance.patch
From: Oever González <notengobattery@gmail.com>
Subject: [PATCH] fs: SQUASHFS: improve SQUASHFS error resistance
Date: Sat, 25 Jul 2020 18:49:31 -0600
This patch greatly improves the SQUASHFS's ability to deal with read errors. By
applying this patch, a corrupted page in the cache will not longer lead to a
persistent failure in decompression, at least most of the times.
Signed-off-by: Oever González <notengobattery@gmail.com>
---
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -65,6 +65,42 @@ static struct buffer_head *get_block_length(struct super_block *sb,
return bh;
}
+/*
+ * Kill the pages if an error is found, this will try to evict them from the
+ * page cache forcing the kernel to read again the device, hopping that the read
+ * error is just transitory.
+ *
+ * This efectively cleans the "back" cache. Normal file systems will do this,
+ * since this makes them more resistant to sparse or transitory I/O failures.
+ *
+ */
+static inline void __squashfs_kill_pages(struct buffer_head **bh, int quantity)
+{
+ int index = 0, total_pages = 0;
+ struct buffer_head *bh_head;
+ struct buffer_head *bh_current;
+
+ for (index = 0; index < quantity; index++) {
+ bh_head = bh[index];
+ bh_current = bh[index];
+
+ do {
+ struct page *bh_page = bh_current->b_page;
+
+ lock_page(bh_page);
+ delete_from_page_cache(bh_page);
+ ClearPageUptodate(bh_page);
+ SetPageError(bh_page);
+ total_pages++;
+ unlock_page(bh_page);
+
+ clear_buffer_uptodate(bh_current);
+ bh_current = bh_current->b_this_page;
+ } while (bh_current != bh_head);
+ }
+
+ WARNING("killed %d pages, %d buffer heads\n", total_pages, quantity);
+}
/*
* Read and decompress a metadata block or datablock. Length is non-zero
@@ -75,8 +111,8 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* generated a larger block - this does occasionally happen with compression
* algorithms).
*/
-int squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
+static inline int __squashfs_read_data(struct super_block *sb, u64 index,
+ int length, u64 *next_index, struct squashfs_page_actor *output)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
@@ -194,11 +230,51 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
block_release:
for (; k < b; k++)
- put_bh(bh[k]);
+ brelse(bh[k]);
read_failure:
- ERROR("squashfs_read_data failed to read block 0x%llx\n",
- (unsigned long long) index);
+ __squashfs_kill_pages(bh, b);
kfree(bh);
return -EIO;
}
+
+/*
+ * If some kind of error is detected, block into this loop rather than crashing
+ * the process that requested the data, since it can be `init` and crashing it
+ * will lead to a kernel panic. If the read still failing, the process is doomed
+ * to crash anyway.
+ *
+ * This only makes SQUASHFS more error resistant by avoiding the poisoning of
+ * the "front" cache if the first attempt failed.
+ *
+ */
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output)
+{
+ int ret = 0, attempts = 0;
+ unsigned long long block = (unsigned long long) index;
+ u64 saved_next_index;
+
+ if (next_index)
+ saved_next_index = *next_index;
+
+ ret = __squashfs_read_data(sb, index, length, next_index, output);
+
+ while (ret < 0 && attempts < 5) { // Retry 5 times, a total of 6 attempts
+ attempts++;
+ TRACE("failed to read block [%llx], retry attempt %d\n",
+ block, attempts);
+ if (next_index)
+ *next_index = saved_next_index;
+ ret = __squashfs_read_data(sb, index, length, next_index, output);
+ }
+
+ if (attempts > 0 && ret >= 0)
+ TRACE("read_data: success after %d attempts to read block [%llx]\n",
+ attempts, block);
+ else if (attempts > 0 && ret < 0)
+ ERROR("read_data: failed after %d attempts to read block [%llx]\n",
+ attempts + 1);
+
+ return ret;
+}
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -112,8 +112,12 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
spin_lock(&cache->lock);
- if (entry->length < 0)
+ if (entry->length < 0) {
entry->error = entry->length;
+ WARNING("Invalidated %s cache entry [%llx]\n", cache->name,
+ entry->block);
+ entry->block = SQUASHFS_INVALID_BLK;
+ }
entry->pending = 0;
--- a/fs/squashfs/decompressor_multi.c
+++ b/fs/squashfs/decompressor_multi.c
@@ -189,8 +189,6 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
bh, b, offset, length, output);
put_decomp_stream(decomp_stream, stream);
- if (res < 0)
- ERROR("%s decompression failed, data probably corrupt\n",
- msblk->decompressor->name);
+
return res;
}
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -82,10 +82,6 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
offset, length, output);
put_cpu_ptr(stream);
- if (res < 0)
- ERROR("%s decompression failed, data probably corrupt\n",
- msblk->decompressor->name);
-
return res;
}
--- a/fs/squashfs/decompressor_single.c
+++ b/fs/squashfs/decompressor_single.c
@@ -70,10 +70,6 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
offset, length, output);
mutex_unlock(&stream->mutex);
- if (res < 0)
- ERROR("%s decompression failed, data probably corrupt\n",
- msblk->decompressor->name);
-
return res;
}
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -452,6 +452,7 @@ static int __init init_squashfs_fs(void)
}
pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
+ pr_info("patched 4.0 (2020/11/10) Oever Gonzalez\n");
return 0;
}
-
9994-mtd_ubi_improve_the_i_o_retries.patch
From: Oever González <notengobattery@gmail.com>
Subject: [PATCH] mtd: UBI: improve the I/O retries
Date: Thu, 23 Jul 2020 09:41:17 -0600
In order to make UBI more error resistant, and because this also affects the
stress test (to check if a PEB has died), improve the retries number from the
current value of 3 to 4.
Signed-off-by: Oever González <notengobattery@gmail.com>
---
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -76,6 +76,7 @@
#include <linux/crc32.h>
#include <linux/err.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include "ubi.h"
static int self_check_not_bad(const struct ubi_device *ubi, int pnum);
@@ -173,7 +174,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
if (retries++ < UBI_IO_RETRIES) {
ubi_warn(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read only %zd bytes, retry",
err, errstr, len, pnum, offset, read);
- yield();
+ msleep(100);
goto retry;
}
@@ -328,7 +329,7 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum)
if (retries++ < UBI_IO_RETRIES) {
ubi_warn(ubi, "error %d while erasing PEB %d, retry",
err, pnum);
- yield();
+ msleep(100);
goto retry;
}
ubi_err(ubi, "cannot erase PEB %d, error %d", pnum, err);
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -63,7 +63,7 @@ void ubi_err(const struct ubi_device *ubi, const char *fmt, ...);
* In case of errors, UBI tries to repeat the operation several times before
* returning error. The below constant defines how many times UBI re-tries.
*/
-#define UBI_IO_RETRIES 3
+#define UBI_IO_RETRIES 4
/*
* Length of the protection queue. The length is effectively equivalent to the
-
9995-spi_qup_implement_error_handle_function.patch
From: Oever González <notengobattery@gmail.com> Subject: [PATCH] SPI: QUP: implement error handle function Date: Thu, 23 Jul 2020 22:19:28 -0600 Implement the `handle_err` function for the SPI QUP driver. Also, remove calls to two deprecated kernel API functions, replacing them with the currently recommended versions. Signed-off-by: Oever González <notengobattery@gmail.com> --- --- a/drivers/spi/spi-qup.c +++ a/drivers/spi/spi-qup.c @@ -417,9 +417,9 @@ static void spi_qup_dma_terminate(struct spi_master *master, struct spi_transfer *xfer) { if (xfer->tx_buf) - dmaengine_terminate_all(master->dma_tx); + dmaengine_terminate_sync(master->dma_tx); if (xfer->rx_buf) - dmaengine_terminate_all(master->dma_rx); + dmaengine_terminate_sync(master->dma_rx); } static u32 spi_qup_sgl_get_nents_len(struct scatterlist *sgl, u32 max, @@ -932,11 +932,11 @@ static int spi_qup_init_dma(struct spi_master *master, resource_size_t base) int ret; /* allocate dma resources, if available */ - master->dma_rx = dma_request_slave_channel_reason(dev, "rx"); + master->dma_rx = dma_request_chan(dev, "rx"); if (IS_ERR(master->dma_rx)) return PTR_ERR(master->dma_rx); - master->dma_tx = dma_request_slave_channel_reason(dev, "tx"); + master->dma_tx = dma_request_chan(dev, "tx"); if (IS_ERR(master->dma_tx)) { ret = PTR_ERR(master->dma_tx); goto err_tx; @@ -992,6 +992,24 @@ static void spi_qup_set_cs(struct spi_device *spi, bool val) writel_relaxed(spi_ioc, controller->base + SPI_IO_CONTROL); } +static void spi_qup_handle_err(struct spi_master *master, + struct spi_message *msg) +{ + struct spi_qup *controller = spi_master_get_devdata(master); + unsigned long flags; + + spin_lock_irqsave(&controller->lock, flags); + controller->error = 0; + controller->rx_bytes = 0; + controller->tx_bytes = 0; + spin_unlock_irqrestore(&controller->lock, flags); + + spi_qup_set_state(controller, QUP_STATE_RESET); + + if (spi_qup_is_dma_xfer(controller->mode)) + spi_qup_dma_terminate(master, controller->xfer); +} + static int spi_qup_probe(struct platform_device *pdev) { struct spi_master *master; @@ -1063,6 +1081,7 @@ static int spi_qup_probe(struct platform_device *pdev) master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); master->max_speed_hz = max_freq; master->transfer_one = spi_qup_transfer_one; + master->handle_err = spi_qup_handle_err; master->dev.of_node = pdev->dev.of_node; master->auto_runtime_pm = true; master->dma_alignment = dma_get_cache_alignment();