public inbox for development@lists.ipfire.org
 help / color / mirror / Atom feed
From: Matthias Fischer <matthias.fischer@ipfire.org>
To: development@lists.ipfire.org
Subject: [PATCH] squid 4.5: latest patches (01-09)
Date: Mon, 04 Feb 2019 18:54:35 +0100	[thread overview]
Message-ID: <20190204175435.8456-1-matthias.fischer@ipfire.org> (raw)

[-- Attachment #1: Type: text/plain, Size: 42248 bytes --]

For details see:
http://www.squid-cache.org/Versions/v4/changesets/

Best,
Matthias

Signed-off-by: Matthias Fischer <matthias.fischer(a)ipfire.org>
---
 lfs/squid                                     |  11 +-
 ...ile_errors_with_-O3_optimization_288.patch | 107 ++++++++
 ...GoIntoBackground_fork_call_fails_344.patch |  29 +++
 ...associated_with_auto-consumption_348.patch | 118 +++++++++
 ...ds_that_define_OPENSSL_NO_ENGINE_349.patch |  22 ++
 ...disker-to-worker_queue_overflows_353.patch |  92 +++++++
 ..._when_reserving_a_new_cache_slot_350.patch | 246 ++++++++++++++++++
 ...opped_some_of_the_write_requests_352.patch | 164 ++++++++++++
 ...call_setsid_in_--foreground_mode_354.patch |  36 +++
 ...ect_IPv6_loopback_binding_errors_355.patch |  54 ++++
 10 files changed, 878 insertions(+), 1 deletion(-)
 create mode 100644 src/patches/squid/01_Bug_4875_pt2_GCC-8_compile_errors_with_-O3_optimization_288.patch
 create mode 100644 src/patches/squid/02_Bug_4856_Exit_when_GoIntoBackground_fork_call_fails_344.patch
 create mode 100644 src/patches/squid/03_Fix_BodyPipe_Sink_memory_leaks_associated_with_auto-consumption_348.patch
 create mode 100644 src/patches/squid/04_Fix_OpenSSL_builds_that_define_OPENSSL_NO_ENGINE_349.patch
 create mode 100644 src/patches/squid/05_Fixed_disker-to-worker_queue_overflows_353.patch
 create mode 100644 src/patches/squid/06_Initialize_StoreMapSlice_when_reserving_a_new_cache_slot_350.patch
 create mode 100644 src/patches/squid/07_Fail_Rock_swapout_if_the_disk_dropped_some_of_the_write_requests_352.patch
 create mode 100644 src/patches/squid/08_Bug_4914_Do_not_call_setsid_in_--foreground_mode_354.patch
 create mode 100644 src/patches/squid/09_Bug_4915_Detect_IPv6_loopback_binding_errors_355.patch

diff --git a/lfs/squid b/lfs/squid
index 6033ab394..6c4706240 100644
--- a/lfs/squid
+++ b/lfs/squid
@@ -72,6 +72,15 @@ $(subst %,%_MD5,$(objects)) :
 $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects))
 	@$(PREBUILD)
 	@rm -rf $(DIR_APP) && cd $(DIR_SRC) && tar xaf $(DIR_DL)/$(DL_FILE)
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/01_Bug_4875_pt2_GCC-8_compile_errors_with_-O3_optimization_288.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/02_Bug_4856_Exit_when_GoIntoBackground_fork_call_fails_344.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/03_Fix_BodyPipe_Sink_memory_leaks_associated_with_auto-consumption_348.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/04_Fix_OpenSSL_builds_that_define_OPENSSL_NO_ENGINE_349.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/05_Fixed_disker-to-worker_queue_overflows_353.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/06_Initialize_StoreMapSlice_when_reserving_a_new_cache_slot_350.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/07_Fail_Rock_swapout_if_the_disk_dropped_some_of_the_write_requests_352.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/08_Bug_4914_Do_not_call_setsid_in_--foreground_mode_354.patch
+	cd $(DIR_APP) && patch -Np1 -i $(DIR_SRC)/src/patches/squid/09_Bug_4915_Detect_IPv6_loopback_binding_errors_355.patch
 	cd $(DIR_APP) && patch -Np0 -i $(DIR_SRC)/src/patches/squid/squid-4.5-fix-max-file-descriptors.patch
 
 	cd $(DIR_APP) && autoreconf -vfi
@@ -91,7 +100,7 @@ $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects))
 		--disable-kqueue \
 		--disable-esi \
 		--disable-arch-native \
-		--enable-ipv6 \
+		--disable-ipv6 \
 		--enable-poll \
 		--enable-ident-lookups \
 		--enable-storeio=aufs,diskd,ufs \
diff --git a/src/patches/squid/01_Bug_4875_pt2_GCC-8_compile_errors_with_-O3_optimization_288.patch b/src/patches/squid/01_Bug_4875_pt2_GCC-8_compile_errors_with_-O3_optimization_288.patch
new file mode 100644
index 000000000..1e3b731a0
--- /dev/null
+++ b/src/patches/squid/01_Bug_4875_pt2_GCC-8_compile_errors_with_-O3_optimization_288.patch
@@ -0,0 +1,107 @@
+commit d6adda4644e614dcaa6831abc5efffb2af51a7c8
+Author: Amos Jeffries <yadij(a)users.noreply.github.com>
+Date:   2019-01-06 13:22:19 +0000
+
+    Bug 4875 pt2: GCC-8 compile errors with -O3 optimization (#288)
+    
+    GCC-8 warnings exposed at -O3 optimization causes its
+    own static analyzer to detect optimized code is eliding
+    initialization on paths that do not use the
+    configuration variables.
+    
+    Refactor the parseTimeLine() API to return the parsed
+    values so that there is no need to initialize anything prior
+    to parsing.
+
+diff --git a/src/cache_cf.cc b/src/cache_cf.cc
+index c4eb955..a45ea2e 100644
+--- a/src/cache_cf.cc
++++ b/src/cache_cf.cc
+@@ -160,7 +160,6 @@ static bool setLogformat(CustomLog *cl, const char *name, const bool dieWhenMiss
+ static void configDoConfigure(void);
+ static void parse_refreshpattern(RefreshPattern **);
+ static uint64_t parseTimeUnits(const char *unit,  bool allowMsec);
+-static void parseTimeLine(time_msec_t * tptr, const char *units, bool allowMsec, bool expectMoreArguments);
+ static void parse_u_short(unsigned short * var);
+ static void parse_string(char **);
+ static void default_all(void);
+@@ -1034,19 +1033,19 @@ parse_obsolete(const char *name)
+ 
+ /* Parse a time specification from the config file.  Store the
+  * result in 'tptr', after converting it to 'units' */
+-static void
+-parseTimeLine(time_msec_t * tptr, const char *units,  bool allowMsec,  bool expectMoreArguments = false)
++static time_msec_t
++parseTimeLine(const char *units,  bool allowMsec,  bool expectMoreArguments = false)
+ {
+     time_msec_t u = parseTimeUnits(units, allowMsec);
+     if (u == 0) {
+         self_destruct();
+-        return;
++        return 0;
+     }
+ 
+-    char *token = ConfigParser::NextToken();;
++    char *token = ConfigParser::NextToken();
+     if (!token) {
+         self_destruct();
+-        return;
++        return 0;
+     }
+ 
+     double d = xatof(token);
+@@ -1059,7 +1058,7 @@ parseTimeLine(time_msec_t * tptr, const char *units,  bool allowMsec,  bool expe
+ 
+         } else if (!expectMoreArguments) {
+             self_destruct();
+-            return;
++            return 0;
+ 
+         } else {
+             token = NULL; // show default units if dying below
+@@ -1068,13 +1067,14 @@ parseTimeLine(time_msec_t * tptr, const char *units,  bool allowMsec,  bool expe
+     } else
+         token = NULL; // show default units if dying below.
+ 
+-    *tptr = static_cast<time_msec_t>(m * d);
++    const auto result = static_cast<time_msec_t>(m * d);
+ 
+-    if (static_cast<double>(*tptr) * 2 != m * d * 2) {
++    if (static_cast<double>(result) * 2 != m * d * 2) {
+         debugs(3, DBG_CRITICAL, "FATAL: Invalid value '" <<
+                d << " " << (token ? token : units) << ": integer overflow (time_msec_t).");
+         self_destruct();
+     }
++    return result;
+ }
+ 
+ static uint64_t
+@@ -2918,8 +2918,7 @@ dump_time_t(StoreEntry * entry, const char *name, time_t var)
+ void
+ parse_time_t(time_t * var)
+ {
+-    time_msec_t tval;
+-    parseTimeLine(&tval, T_SECOND_STR, false);
++    time_msec_t tval = parseTimeLine(T_SECOND_STR, false);
+     *var = static_cast<time_t>(tval/1000);
+ }
+ 
+@@ -2941,7 +2940,7 @@ dump_time_msec(StoreEntry * entry, const char *name, time_msec_t var)
+ void
+ parse_time_msec(time_msec_t * var)
+ {
+-    parseTimeLine(var, T_SECOND_STR, true);
++    *var = parseTimeLine(T_SECOND_STR, true);
+ }
+ 
+ static void
+@@ -4814,8 +4813,7 @@ static void free_ftp_epsv(acl_access **ftp_epsv)
+ static void
+ parse_UrlHelperTimeout(SquidConfig::UrlHelperTimeout *config)
+ {
+-    time_msec_t tval;
+-    parseTimeLine(&tval, T_SECOND_STR, false, true);
++    const auto tval = parseTimeLine(T_SECOND_STR, false, true);
+     Config.Timeout.urlRewrite = static_cast<time_t>(tval/1000);
+ 
+     char *key, *value;
diff --git a/src/patches/squid/02_Bug_4856_Exit_when_GoIntoBackground_fork_call_fails_344.patch b/src/patches/squid/02_Bug_4856_Exit_when_GoIntoBackground_fork_call_fails_344.patch
new file mode 100644
index 000000000..3159b9d1c
--- /dev/null
+++ b/src/patches/squid/02_Bug_4856_Exit_when_GoIntoBackground_fork_call_fails_344.patch
@@ -0,0 +1,29 @@
+commit 6aa83b8d51dbb66bf5383f18615a59d8ff0a10a4
+Author: Marcos Mello <marcosfrm(a)users.noreply.github.com>
+Date:   2019-01-08 10:14:09 +0000
+
+    Bug 4856: Exit when GoIntoBackground() fork() call fails (#344)
+    
+    Ignoring the (implicit/default) command-line instruction to background
+    Squid is wrong in general and confuses at least some daemon managers.
+
+diff --git a/src/main.cc b/src/main.cc
+index eeb8f72..7f5ec80 100644
+--- a/src/main.cc
++++ b/src/main.cc
+@@ -1865,13 +1865,12 @@ GoIntoBackground()
+     pid_t pid;
+     if ((pid = fork()) < 0) {
+         int xerrno = errno;
+-        syslog(LOG_ALERT, "fork failed: %s", xstrerr(xerrno));
+-        // continue anyway, mimicking --foreground mode (XXX?)
++        throw TexcHere(ToSBuf("failed to fork(2) the master process: ", xstrerr(xerrno)));
+     } else if (pid > 0) {
+         // parent
+         exit(EXIT_SUCCESS);
+     }
+-    // child, running as a background daemon (or a failed-to-fork parent)
++    // child, running as a background daemon
+ }
+ 
+ static void
diff --git a/src/patches/squid/03_Fix_BodyPipe_Sink_memory_leaks_associated_with_auto-consumption_348.patch b/src/patches/squid/03_Fix_BodyPipe_Sink_memory_leaks_associated_with_auto-consumption_348.patch
new file mode 100644
index 000000000..efb0d4c69
--- /dev/null
+++ b/src/patches/squid/03_Fix_BodyPipe_Sink_memory_leaks_associated_with_auto-consumption_348.patch
@@ -0,0 +1,118 @@
+commit dd9d539a35275071d7718cfbdbc3203d7aa05139
+Author: Alex Rousskov <rousskov(a)measurement-factory.com>
+Date:   2019-01-08 15:14:18 +0000
+
+    Fix BodyPipe/Sink memory leaks associated with auto-consumption (#348)
+    
+    Auto-consumption happens (and could probably leak memory) in many cases,
+    but this leak was exposed by an eCAP service that blocked or replaced
+    virgin messages.
+    
+    The BodySink job termination algorithm relies on body production
+    notifications. A BodySink job created after the body production had
+    ended can never stop and, hence, leaks (leaking the associated BodyPipe
+    object with it). Such a job is also useless:  If production is over,
+    there is no need to free space for more body data!  This change avoids
+    creating such leaking and useless jobs.
+
+diff --git a/src/BodyPipe.cc b/src/BodyPipe.cc
+index 0fdf0dc..59477bd 100644
+--- a/src/BodyPipe.cc
++++ b/src/BodyPipe.cc
+@@ -286,8 +286,7 @@ BodyPipe::expectNoConsumption()
+         abortedConsumption = true;
+ 
+         // in case somebody enabled auto-consumption before regular one aborted
+-        if (mustAutoConsume)
+-            startAutoConsumption();
++        startAutoConsumptionIfNeeded();
+     }
+ }
+ 
+@@ -313,23 +312,28 @@ BodyPipe::consume(size_t size)
+     postConsume(size);
+ }
+ 
+-// In the AutoConsumption  mode the consumer has gone but the producer continues
+-// producing data. We are using a BodySink BodyConsumer which just discards the produced data.
+ void
+ BodyPipe::enableAutoConsumption()
+ {
+     mustAutoConsume = true;
+     debugs(91,5, HERE << "enabled auto consumption" << status());
+-    if (!theConsumer && theBuf.hasContent())
+-        startAutoConsumption();
++    startAutoConsumptionIfNeeded();
+ }
+ 
+-// start auto consumption by creating body sink
++/// Check the current need and, if needed, start auto consumption. In auto
++/// consumption mode, the consumer is gone, but the producer continues to
++/// produce data. We use a BodySink BodyConsumer to discard that data.
+ void
+-BodyPipe::startAutoConsumption()
++BodyPipe::startAutoConsumptionIfNeeded()
+ {
+-    Must(mustAutoConsume);
+-    Must(!theConsumer);
++    const auto startNow =
++        mustAutoConsume && // was enabled
++        !theConsumer && // has not started yet
++        theProducer.valid() && // still useful (and will eventually stop)
++        theBuf.hasContent(); // has something to consume right now
++    if (!startNow)
++        return;
++
+     theConsumer = new BodySink(this);
+     debugs(91,7, HERE << "starting auto consumption" << status());
+     scheduleBodyDataNotification();
+@@ -393,15 +397,14 @@ BodyPipe::postAppend(size_t size)
+     thePutSize += size;
+     debugs(91,7, HERE << "added " << size << " bytes" << status());
+ 
+-    if (mustAutoConsume && !theConsumer && size > 0)
+-        startAutoConsumption();
++    if (!mayNeedMoreData())
++        clearProducer(true); // reached end-of-body
+ 
+     // We should not consume here even if mustAutoConsume because the
+     // caller may not be ready for the data to be consumed during this call.
+     scheduleBodyDataNotification();
+ 
+-    if (!mayNeedMoreData())
+-        clearProducer(true); // reached end-of-body
++    startAutoConsumptionIfNeeded();
+ }
+ 
+ void
+diff --git a/src/BodyPipe.h b/src/BodyPipe.h
+index 2bf3220..7061c78 100644
+--- a/src/BodyPipe.h
++++ b/src/BodyPipe.h
+@@ -131,7 +131,7 @@ public:
+     bool exhausted() const; // saw eof/abort and all data consumed
+     bool stillConsuming(const Consumer::Pointer &consumer) const { return theConsumer == consumer; }
+ 
+-    // start or continue consuming when there is no consumer
++    /// start or continue consuming when producing without consumer
+     void enableAutoConsumption();
+ 
+     const MemBuf &buf() const { return theBuf; }
+@@ -151,7 +151,7 @@ protected:
+     void postConsume(size_t size);
+     void postAppend(size_t size);
+ 
+-    void startAutoConsumption(); // delayed start of enabled consumption
++    void startAutoConsumptionIfNeeded();
+ 
+ private:
+     int64_t  theBodySize;   // expected total content length, if known
+@@ -163,7 +163,7 @@ private:
+ 
+     MemBuf theBuf; // produced but not yet consumed content, if any
+ 
+-    bool mustAutoConsume; // consume when there is no consumer
++    bool mustAutoConsume; ///< keep theBuf empty when producing without consumer
+     bool abortedConsumption; ///< called BodyProducer::noteBodyConsumerAborted
+     bool isCheckedOut; // to keep track of checkout violations
+ };
diff --git a/src/patches/squid/04_Fix_OpenSSL_builds_that_define_OPENSSL_NO_ENGINE_349.patch b/src/patches/squid/04_Fix_OpenSSL_builds_that_define_OPENSSL_NO_ENGINE_349.patch
new file mode 100644
index 000000000..c868d585c
--- /dev/null
+++ b/src/patches/squid/04_Fix_OpenSSL_builds_that_define_OPENSSL_NO_ENGINE_349.patch
@@ -0,0 +1,22 @@
+commit 186fc1d7c3bad845882d8e3497af9c3a42d75e8f
+Author: Rosen Penev <rosenp(a)gmail.com>
+Date:   2019-01-09 17:42:21 +0000
+
+    Fix OpenSSL builds that define OPENSSL_NO_ENGINE (#349)
+    
+    Even with ENGINE support disabled, OpenSSL provides the openssl/engine.h
+    header. We have to use OPENSSL_NO_ENGINE to detect ENGINE support.
+
+diff --git a/src/ssl/support.cc b/src/ssl/support.cc
+index f6d4ce7..e350a73 100644
+--- a/src/ssl/support.cc
++++ b/src/ssl/support.cc
+@@ -485,7 +485,7 @@ Ssl::Initialize(void)
+ 
+     SQUID_OPENSSL_init_ssl();
+ 
+-#if HAVE_OPENSSL_ENGINE_H
++#if !defined(OPENSSL_NO_ENGINE)
+     if (::Config.SSL.ssl_engine) {
+         ENGINE_load_builtin_engines();
+         ENGINE *e;
diff --git a/src/patches/squid/05_Fixed_disker-to-worker_queue_overflows_353.patch b/src/patches/squid/05_Fixed_disker-to-worker_queue_overflows_353.patch
new file mode 100644
index 000000000..1b656549f
--- /dev/null
+++ b/src/patches/squid/05_Fixed_disker-to-worker_queue_overflows_353.patch
@@ -0,0 +1,92 @@
+commit 4ce035593f5257ee2deb6ca85c7b9a0c4b850f70
+Author: Eduard Bagdasaryan <eduard.bagdasaryan(a)measurement-factory.com>
+Date:   2019-01-12 10:05:30 +0000
+
+    Fixed disker-to-worker queue overflows (#353)
+    
+    Before this fix, Squid sometimes logged the following error:
+    
+        BUG: Worker I/O pop queue for ... overflow: ...
+    
+    The bug could result in truncated hit responses, reduced hit ratio, and,
+    combined with buggy lost I/O handling code (GitHub PR #352), even cache
+    corruption.
+    
+    The bug could be triggered by the following sequence of events:
+    
+    * Disker dequeues one I/O request from the worker push queue.
+    * Worker pushes more I/O requests to that disker, reaching 1024 requests
+      in its push queue (QueueCapacity or just "N" below). No overflow here!
+    * Worker process is suspended (or is just too busy to pop I/O results).
+    * Disker satisfies all 1+N requests, adding each to the worker pop queue
+      and overflows that queue when adding the last processed request.
+    
+    This fix limits worker push so that the sum of all pending requests
+    never exceeds (pop) queue capacity. This approach will continue to work
+    even if diskers are enhanced to dequeue multiple requests for seek
+    optimization and/or priority-based scheduling.
+    
+    Pop queue and push queue can still accommodate N requests each. The fix
+    appears to reduce supported disker "concurrency" levels from 2N down to
+    N pending I/O requests, reducing queue memory utilization. However, the
+    actual reduction is from N+1 to N: Since a worker pops all its satisfied
+    requests before queuing a new one, there could never be more than N+1
+    pending requests (N in the push queue and 1 worked on by the disker).
+    
+    We left the BUG reporting and handling intact. There are no known bugs
+    in that code now. If the bug never surfaces again, it can be replaced
+    with code that translates low-level queue overflow exception into a
+    user-friendly TextException.
+
+diff --git a/src/DiskIO/IpcIo/IpcIoFile.cc b/src/DiskIO/IpcIo/IpcIoFile.cc
+index 735cf0f..95d6d8b 100644
+--- a/src/DiskIO/IpcIo/IpcIoFile.cc
++++ b/src/DiskIO/IpcIo/IpcIoFile.cc
+@@ -364,6 +364,10 @@ IpcIoFile::push(IpcIoPendingRequest *const pending)
+ 
+         debugs(47, 7, HERE << "pushing " << SipcIo(KidIdentifier, ipcIo, diskId));
+ 
++        // protect DiskerHandleRequest() from pop queue overflow
++        if (pendingRequests() >= QueueCapacity)
++            throw Ipc::OneToOneUniQueue::Full();
++
+         if (queue->push(diskId, ipcIo))
+             Notify(diskId); // must notify disker
+         trackPendingRequest(ipcIo.requestId, pending);
+@@ -879,10 +883,8 @@ IpcIoFile::DiskerHandleRequest(const int workerId, IpcIoMsg &ipcIo)
+         if (queue->push(workerId, ipcIo))
+             Notify(workerId); // must notify worker
+     } catch (const Queue::Full &) {
+-        // The worker queue should not overflow because the worker should pop()
+-        // before push()ing and because if disker pops N requests at a time,
+-        // we should make sure the worker pop() queue length is the worker
+-        // push queue length plus N+1. XXX: implement the N+1 difference.
++        // The worker pop queue should not overflow because the worker can
++        // push only if pendingRequests() is less than QueueCapacity.
+         debugs(47, DBG_IMPORTANT, "BUG: Worker I/O pop queue for " <<
+                DbName << " overflow: " <<
+                SipcIo(workerId, ipcIo, KidIdentifier)); // TODO: report queue len
+diff --git a/src/DiskIO/IpcIo/IpcIoFile.h b/src/DiskIO/IpcIo/IpcIoFile.h
+index f89197e..53a3749 100644
+--- a/src/DiskIO/IpcIo/IpcIoFile.h
++++ b/src/DiskIO/IpcIo/IpcIoFile.h
+@@ -55,6 +55,8 @@ public:
+ 
+ class IpcIoPendingRequest;
+ 
++/// In a worker process, represents a single (remote) cache_dir disker file.
++/// In a disker process, used as a bunch of static methods handling that file.
+ class IpcIoFile: public DiskFile
+ {
+     CBDATA_CLASS(IpcIoFile);
+@@ -98,6 +100,10 @@ private:
+     void push(IpcIoPendingRequest *const pending);
+     IpcIoPendingRequest *dequeueRequest(const unsigned int requestId);
+ 
++    /// the total number of I/O requests in push queue and pop queue
++    /// (but no, the implementation does not add push and pop queue sizes)
++    size_t pendingRequests() const { return olderRequests->size() + newerRequests->size(); }
++
+     static void Notify(const int peerId);
+ 
+     static void OpenTimeout(void *const param);
diff --git a/src/patches/squid/06_Initialize_StoreMapSlice_when_reserving_a_new_cache_slot_350.patch b/src/patches/squid/06_Initialize_StoreMapSlice_when_reserving_a_new_cache_slot_350.patch
new file mode 100644
index 000000000..5f04bffce
--- /dev/null
+++ b/src/patches/squid/06_Initialize_StoreMapSlice_when_reserving_a_new_cache_slot_350.patch
@@ -0,0 +1,246 @@
+commit 8e022a61922053764fd1627ba4b65a1617a1bd8a
+Author: Eduard Bagdasaryan <eduard.bagdasaryan(a)measurement-factory.com>
+Date:   2019-01-23 03:51:12 +0000
+
+    Initialize StoreMapSlice when reserving a new cache slot (#350)
+    
+    Rock sets the StoreMapSlice::next field when sending a slice to disk. To
+    avoid writing slice A twice, Rock allocates a new slice B to prime
+    A.next right before writing A. Scheduling A's writing and, sometimes,
+    lack of data to fill B create a gap between B's allocation and B's
+    writing (which sets B.next). During that time, A.next points to B, but
+    B.next is untouched.
+    
+    If writing slice A or swapout in general fails, the chain of failed
+    entry slices (now containing both A and B) is freed. If untouched B.next
+    contains garbage, then freeChainAt() adds "random" slices after B to the
+    free slice pool. Subsequent swapouts use those incorrectly freed slices,
+    effectively overwriting portions of random cache entries, corrupting the
+    cache.
+    
+    How did B.next get dirty in the first place? freeChainAt() cleans the
+    slices it frees, but Rock also makes direct noteFreeMapSlice() calls.
+    Shared memory cache may have avoided this corruption because it makes no
+    such calls.
+    
+    Ipc::StoreMap::prepFreeSlice() now clears allocated slices. Long-term,
+    we may be able to move free slice management into StoreMap to automate
+    this cleanup.
+    
+    Also simplified and polished slot allocation code a little, removing the
+    Rock::IoState::reserveSlotForWriting() middleman. This change also
+    improves the symmetry between Rock and shared memory cache code.
+
+diff --git a/src/MemStore.cc b/src/MemStore.cc
+index 1b24469..1ff48a0 100644
+--- a/src/MemStore.cc
++++ b/src/MemStore.cc
+@@ -772,13 +772,15 @@ MemStore::reserveSapForWriting(Ipc::Mem::PageId &page)
+ {
+     Ipc::Mem::PageId slot;
+     if (freeSlots->pop(slot)) {
+-        debugs(20, 5, "got a previously free slot: " << slot);
++        const auto slotId = slot.number - 1;
++        debugs(20, 5, "got a previously free slot: " << slotId);
+ 
+         if (Ipc::Mem::GetPage(Ipc::Mem::PageId::cachePage, page)) {
+             debugs(20, 5, "and got a previously free page: " << page);
+-            return slot.number - 1;
++            map->prepFreeSlice(slotId);
++            return slotId;
+         } else {
+-            debugs(20, 3, "but there is no free page, returning " << slot);
++            debugs(20, 3, "but there is no free page, returning " << slotId);
+             freeSlots->push(slot);
+         }
+     }
+@@ -791,8 +793,10 @@ MemStore::reserveSapForWriting(Ipc::Mem::PageId &page)
+         assert(!waitingFor); // noteFreeMapSlice() should have cleared it
+         assert(slot.set());
+         assert(page.set());
+-        debugs(20, 5, "got previously busy " << slot << " and " << page);
+-        return slot.number - 1;
++        const auto slotId = slot.number - 1;
++        map->prepFreeSlice(slotId);
++        debugs(20, 5, "got previously busy " << slotId << " and " << page);
++        return slotId;
+     }
+     assert(waitingFor.slot == &slot && waitingFor.page == &page);
+     waitingFor.slot = NULL;
+diff --git a/src/fs/rock/RockIoState.cc b/src/fs/rock/RockIoState.cc
+index bba24ae..50b6e6d 100644
+--- a/src/fs/rock/RockIoState.cc
++++ b/src/fs/rock/RockIoState.cc
+@@ -190,7 +190,7 @@ Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff)
+     // allocate the first slice during the first write
+     if (!coreOff) {
+         assert(sidCurrent < 0);
+-        sidCurrent = reserveSlotForWriting(); // throws on failures
++        sidCurrent = dir->reserveSlotForWriting(); // throws on failures
+         assert(sidCurrent >= 0);
+         writeAnchor().start = sidCurrent;
+     }
+@@ -207,7 +207,7 @@ Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff)
+         // We do not write a full buffer without overflow because
+         // we would not yet know what to set the nextSlot to.
+         if (overflow) {
+-            const SlotId sidNext = reserveSlotForWriting(); // throws
++            const auto sidNext = dir->reserveSlotForWriting(); // throws
+             assert(sidNext >= 0);
+             writeToDisk(sidNext);
+         } else if (Store::Root().transientReaders(*e)) {
+@@ -306,21 +306,6 @@ Rock::IoState::writeBufToDisk(const SlotId sidNext, const bool eof, const bool l
+     theFile->write(r);
+ }
+ 
+-/// finds and returns a free db slot to fill or throws
+-Rock::SlotId
+-Rock::IoState::reserveSlotForWriting()
+-{
+-    Ipc::Mem::PageId pageId;
+-    if (dir->useFreeSlot(pageId))
+-        return pageId.number-1;
+-
+-    // This may happen when the number of available db slots is close to the
+-    // number of concurrent requests reading or writing those slots, which may
+-    // happen when the db is "small" compared to the request traffic OR when we
+-    // are rebuilding and have not loaded "many" entries or empty slots yet.
+-    throw TexcHere("ran out of free db slots");
+-}
+-
+ void
+ Rock::IoState::finishedWriting(const int errFlag)
+ {
+diff --git a/src/fs/rock/RockIoState.h b/src/fs/rock/RockIoState.h
+index 3c21355..b8f2061 100644
+--- a/src/fs/rock/RockIoState.h
++++ b/src/fs/rock/RockIoState.h
+@@ -66,7 +66,6 @@ private:
+     size_t writeToBuffer(char const *buf, size_t size);
+     void writeToDisk(const SlotId nextSlot);
+     void writeBufToDisk(const SlotId nextSlot, const bool eof, const bool lastWrite);
+-    SlotId reserveSlotForWriting();
+ 
+     void callBack(int errflag);
+ 
+diff --git a/src/fs/rock/RockSwapDir.cc b/src/fs/rock/RockSwapDir.cc
+index 06fb763..f170bb6 100644
+--- a/src/fs/rock/RockSwapDir.cc
++++ b/src/fs/rock/RockSwapDir.cc
+@@ -699,12 +699,16 @@ Rock::SwapDir::diskOffsetLimit() const
+     return diskOffset(map->sliceLimit());
+ }
+ 
+-bool
+-Rock::SwapDir::useFreeSlot(Ipc::Mem::PageId &pageId)
++Rock::SlotId
++Rock::SwapDir::reserveSlotForWriting()
+ {
++    Ipc::Mem::PageId pageId;
++
+     if (freeSlots->pop(pageId)) {
+-        debugs(47, 5, "got a previously free slot: " << pageId);
+-        return true;
++        const auto slotId = pageId.number - 1;
++        debugs(47, 5, "got a previously free slot: " << slotId);
++        map->prepFreeSlice(slotId);
++        return slotId;
+     }
+ 
+     // catch free slots delivered to noteFreeMapSlice()
+@@ -713,14 +717,20 @@ Rock::SwapDir::useFreeSlot(Ipc::Mem::PageId &pageId)
+     if (map->purgeOne()) {
+         assert(!waitingForPage); // noteFreeMapSlice() should have cleared it
+         assert(pageId.set());
+-        debugs(47, 5, "got a previously busy slot: " << pageId);
+-        return true;
++        const auto slotId = pageId.number - 1;
++        debugs(47, 5, "got a previously busy slot: " << slotId);
++        map->prepFreeSlice(slotId);
++        return slotId;
+     }
+     assert(waitingForPage == &pageId);
+     waitingForPage = NULL;
+ 
++    // This may happen when the number of available db slots is close to the
++    // number of concurrent requests reading or writing those slots, which may
++    // happen when the db is "small" compared to the request traffic OR when we
++    // are rebuilding and have not loaded "many" entries or empty slots yet.
+     debugs(47, 3, "cannot get a slot; entries: " << map->entryCount());
+-    return false;
++    throw TexcHere("ran out of free db slots");
+ }
+ 
+ bool
+diff --git a/src/fs/rock/RockSwapDir.h b/src/fs/rock/RockSwapDir.h
+index 879bd45..aa88283 100644
+--- a/src/fs/rock/RockSwapDir.h
++++ b/src/fs/rock/RockSwapDir.h
+@@ -62,10 +62,12 @@ public:
+     int64_t slotLimitAbsolute() const; ///< Rock store implementation limit
+     int64_t slotLimitActual() const; ///< total number of slots in this db
+ 
+-    /// removes a slot from a list of free slots or returns false
+-    bool useFreeSlot(Ipc::Mem::PageId &pageId);
+     /// whether the given slot ID may point to a slot in this db
+     bool validSlotId(const SlotId slotId) const;
++
++    /// finds and returns a free db slot to fill or throws
++    SlotId reserveSlotForWriting();
++
+     /// purges one or more entries to make full() false and free some slots
+     void purgeSome();
+ 
+diff --git a/src/ipc/StoreMap.cc b/src/ipc/StoreMap.cc
+index ba1d4df..e778bba 100644
+--- a/src/ipc/StoreMap.cc
++++ b/src/ipc/StoreMap.cc
+@@ -346,8 +346,7 @@ Ipc::StoreMap::freeChainAt(SliceId sliceId, const SliceId splicingPoint)
+     while (sliceId >= 0) {
+         Slice &slice = sliceAt(sliceId);
+         const SliceId nextId = slice.next;
+-        slice.size = 0;
+-        slice.next = -1;
++        slice.clear();
+         if (cleaner)
+             cleaner->noteFreeMapSlice(sliceId); // might change slice state
+         if (sliceId == splicingPoint) {
+@@ -360,6 +359,14 @@ Ipc::StoreMap::freeChainAt(SliceId sliceId, const SliceId splicingPoint)
+     debugs(54, 7, "freed chain #" << chainId << " in " << path);
+ }
+ 
++void
++Ipc::StoreMap::prepFreeSlice(const SliceId sliceId)
++{
++    // TODO: Move freeSlots here, along with reserveSlotForWriting() logic.
++    assert(validSlice(sliceId));
++    sliceAt(sliceId).clear();
++}
++
+ Ipc::StoreMap::SliceId
+ Ipc::StoreMap::sliceContaining(const sfileno fileno, const uint64_t bytesNeeded) const
+ {
+diff --git a/src/ipc/StoreMap.h b/src/ipc/StoreMap.h
+index 5d44379..8a4909e 100644
+--- a/src/ipc/StoreMap.h
++++ b/src/ipc/StoreMap.h
+@@ -42,6 +42,9 @@ public:
+         return *this;
+     }
+ 
++    /// restore default-constructed state
++    void clear() { size = 0; next = -1; }
++
+     std::atomic<Size> size; ///< slice contents size
+     std::atomic<StoreMapSliceId> next; ///< ID of the next entry slice
+ };
+@@ -292,6 +295,9 @@ public:
+     /// readable anchor for the entry created by openForReading()
+     const Anchor &readableEntry(const AnchorId anchorId) const;
+ 
++    /// prepare a chain-unaffiliated slice for being added to an entry chain
++    void prepFreeSlice(const SliceId sliceId);
++
+     /// Returns the ID of the entry slice containing n-th byte or
+     /// a negative ID if the entry does not store that many bytes (yet).
+     /// Requires a read lock.
diff --git a/src/patches/squid/07_Fail_Rock_swapout_if_the_disk_dropped_some_of_the_write_requests_352.patch b/src/patches/squid/07_Fail_Rock_swapout_if_the_disk_dropped_some_of_the_write_requests_352.patch
new file mode 100644
index 000000000..14ce96a33
--- /dev/null
+++ b/src/patches/squid/07_Fail_Rock_swapout_if_the_disk_dropped_some_of_the_write_requests_352.patch
@@ -0,0 +1,164 @@
+commit a374216b3262b6b40dc19b8ea5423cd2f27f94a3
+Author: Eduard Bagdasaryan <eduard.bagdasaryan(a)measurement-factory.com>
+Date:   2019-01-23 04:30:40 +0000
+
+    Fail Rock swapout if the disk dropped some of the write requests (#352)
+    
+    Detecting dropped writes earlier is more than a TODO: If the last entry
+    write was successful, the whole entry becomes available for hits
+    immediately.  IpcIoFile::checkTimeouts() that runs every 7 seconds
+    (IpcIoFile::Timeout) would eventually notify Rock about the timeout,
+    allowing Rock to release the failed entry, but that notification may
+    be too late.
+    
+    The precise outcome of hitting an entry with a missing on-disk slice is
+    unknown (because the bug was detected by temporary hit validation code
+    that turned such hits into misses), but SWAPFAIL is the best we could
+    hope for.
+
+diff --git a/src/fs/rock/RockSwapDir.cc b/src/fs/rock/RockSwapDir.cc
+index f170bb6..b70dc8b 100644
+--- a/src/fs/rock/RockSwapDir.cc
++++ b/src/fs/rock/RockSwapDir.cc
+@@ -858,42 +858,59 @@ Rock::SwapDir::writeCompleted(int errflag, size_t, RefCount< ::WriteRequest> r)
+ 
+     debugs(79, 7, "errflag=" << errflag << " rlen=" << request->len << " eof=" << request->eof);
+ 
+-    // TODO: Fail if disk dropped one of the previous write requests.
+-
+-    if (errflag == DISK_OK) {
+-        // do not increment sio.offset_ because we do it in sio->write()
+-
+-        // finalize the shared slice info after writing slice contents to disk
+-        Ipc::StoreMap::Slice &slice =
+-            map->writeableSlice(sio.swap_filen, request->sidCurrent);
+-        slice.size = request->len - sizeof(DbCellHeader);
+-        slice.next = request->sidNext;
+-
+-        if (request->eof) {
+-            assert(sio.e);
+-            assert(sio.writeableAnchor_);
+-            if (sio.touchingStoreEntry()) {
+-                sio.e->swap_file_sz = sio.writeableAnchor_->basics.swap_file_sz =
+-                                          sio.offset_;
+-                map->switchWritingToReading(sio.swap_filen);
+-                // sio.e keeps the (now read) lock on the anchor
+-            }
+-            sio.writeableAnchor_ = NULL;
+-            sio.splicingPoint = request->sidCurrent;
+-            sio.finishedWriting(errflag);
+-        }
+-    } else {
+-        noteFreeMapSlice(request->sidNext);
+-
+-        writeError(sio);
+-        sio.finishedWriting(errflag);
+-        // and wait for the finalizeSwapoutFailure() call to close the map entry
+-    }
++    if (errflag != DISK_OK)
++        handleWriteCompletionProblem(errflag, *request);
++    else if (droppedEarlierRequest(*request))
++        handleWriteCompletionProblem(DISK_ERROR, *request);
++    else
++        handleWriteCompletionSuccess(*request);
+ 
+     if (sio.touchingStoreEntry())
+         CollapsedForwarding::Broadcast(*sio.e);
+ }
+ 
++/// code shared by writeCompleted() success handling cases
++void
++Rock::SwapDir::handleWriteCompletionSuccess(const WriteRequest &request)
++{
++    auto &sio = *(request.sio);
++    sio.splicingPoint = request.sidCurrent;
++    // do not increment sio.offset_ because we do it in sio->write()
++
++    // finalize the shared slice info after writing slice contents to disk
++    Ipc::StoreMap::Slice &slice =
++        map->writeableSlice(sio.swap_filen, request.sidCurrent);
++    slice.size = request.len - sizeof(DbCellHeader);
++    slice.next = request.sidNext;
++
++    if (request.eof) {
++        assert(sio.e);
++        assert(sio.writeableAnchor_);
++        if (sio.touchingStoreEntry()) {
++            sio.e->swap_file_sz = sio.writeableAnchor_->basics.swap_file_sz =
++                                      sio.offset_;
++
++            map->switchWritingToReading(sio.swap_filen);
++            // sio.e keeps the (now read) lock on the anchor
++        }
++        sio.writeableAnchor_ = NULL;
++        sio.finishedWriting(DISK_OK);
++    }
++}
++
++/// code shared by writeCompleted() error handling cases
++void
++Rock::SwapDir::handleWriteCompletionProblem(const int errflag, const WriteRequest &request)
++{
++    auto &sio = *request.sio;
++
++    noteFreeMapSlice(request.sidNext);
++
++    writeError(sio);
++    sio.finishedWriting(errflag);
++    // and hope that Core will call disconnect() to close the map entry
++}
++
+ void
+ Rock::SwapDir::writeError(StoreIOState &sio)
+ {
+@@ -908,6 +925,23 @@ Rock::SwapDir::writeError(StoreIOState &sio)
+     // All callers must also call IoState callback, to propagate the error.
+ }
+ 
++/// whether the disk has dropped at least one of the previous write requests
++bool
++Rock::SwapDir::droppedEarlierRequest(const WriteRequest &request) const
++{
++    const auto &sio = *request.sio;
++    assert(sio.writeableAnchor_);
++    const Ipc::StoreMapSliceId expectedSliceId = sio.splicingPoint < 0 ?
++            sio.writeableAnchor_->start :
++            map->writeableSlice(sio.swap_filen, sio.splicingPoint).next;
++    if (expectedSliceId != request.sidCurrent) {
++        debugs(79, 3, "yes; expected " << expectedSliceId << ", but got " << request.sidCurrent);
++        return true;
++    }
++
++    return false;
++}
++
+ void
+ Rock::SwapDir::updateHeaders(StoreEntry *updatedE)
+ {
+diff --git a/src/fs/rock/RockSwapDir.h b/src/fs/rock/RockSwapDir.h
+index aa88283..0748337 100644
+--- a/src/fs/rock/RockSwapDir.h
++++ b/src/fs/rock/RockSwapDir.h
+@@ -138,6 +138,9 @@ protected:
+ 
+ private:
+     void createError(const char *const msg);
++    void handleWriteCompletionSuccess(const WriteRequest &request);
++    void handleWriteCompletionProblem(const int errflag, const WriteRequest &request);
++    bool droppedEarlierRequest(const WriteRequest &request) const;
+ 
+     DiskIOStrategy *io;
+     RefCount<DiskFile> theFile; ///< cache storage for this cache_dir
+diff --git a/src/fs/rock/forward.h b/src/fs/rock/forward.h
+index ba64d03..4e68a27 100644
+--- a/src/fs/rock/forward.h
++++ b/src/fs/rock/forward.h
+@@ -40,6 +40,8 @@ class HeaderUpdater;
+ 
+ class DbCellHeader;
+ 
++class WriteRequest;
++
+ }
+ 
+ #endif /* SQUID_FS_ROCK_FORWARD_H */
diff --git a/src/patches/squid/08_Bug_4914_Do_not_call_setsid_in_--foreground_mode_354.patch b/src/patches/squid/08_Bug_4914_Do_not_call_setsid_in_--foreground_mode_354.patch
new file mode 100644
index 000000000..65bf2e648
--- /dev/null
+++ b/src/patches/squid/08_Bug_4914_Do_not_call_setsid_in_--foreground_mode_354.patch
@@ -0,0 +1,36 @@
+commit 6076ca90a8101888483b769edd2f9aeef0da8638
+Author: Marcos Mello <marcosfrm(a)users.noreply.github.com>
+Date:   2019-01-23 15:18:43 +0000
+
+    Bug 4914: Do not call setsid() in --foreground mode (#354)
+    
+    Squid executed in --foreground is always a process group leader. Thus,
+    setsid(2) is unnecessary and always fails (with EPERM) for such Squids.
+
+diff --git a/src/main.cc b/src/main.cc
+index 7f5ec80..1e8e035 100644
+--- a/src/main.cc
++++ b/src/main.cc
+@@ -1871,6 +1871,7 @@ GoIntoBackground()
+         exit(EXIT_SUCCESS);
+     }
+     // child, running as a background daemon
++    Must(setsid() > 0); // ought to succeed after fork()
+ }
+ 
+ static void
+@@ -1916,14 +1917,6 @@ watch_child(const CommandLine &masterCommand)
+     if (!opt_foreground)
+         GoIntoBackground();
+ 
+-    // TODO: Fails with --foreground if the calling process is process group
+-    //       leader, which is always (?) the case. Should probably moved to
+-    //       GoIntoBackground and executed only after successfully forking
+-    if (setsid() < 0) {
+-        int xerrno = errno;
+-        syslog(LOG_ALERT, "setsid failed: %s", xstrerr(xerrno));
+-    }
+-
+     closelog();
+ 
+ #ifdef TIOCNOTTY
diff --git a/src/patches/squid/09_Bug_4915_Detect_IPv6_loopback_binding_errors_355.patch b/src/patches/squid/09_Bug_4915_Detect_IPv6_loopback_binding_errors_355.patch
new file mode 100644
index 000000000..b6fed3336
--- /dev/null
+++ b/src/patches/squid/09_Bug_4915_Detect_IPv6_loopback_binding_errors_355.patch
@@ -0,0 +1,54 @@
+commit 568e66b7c64e0d7873b9575b0dc60886805a2fc6 (refs/remotes/origin/v4)
+Author: Amos Jeffries <yadij(a)users.noreply.github.com>
+Date:   2019-01-24 16:43:47 +0000
+
+    Bug 4915: Detect IPv6 loopback binding errors (#355)
+    
+    Systems which have been partially 'IPv6 disabled' may allow
+    sockets to be opened and used but missing the IPv6 loopback
+    address.
+    
+    Implement the outstanding TODO to detect such failures and
+    disable IPv6 support properly within Squid when they are found.
+    
+    This should fix bug 4915 auth_param helper startup and similar
+    external_acl_type helper issues. For security such helpers are
+    not permitted to use the machine default IP address which is
+    globally accessible.
+
+diff --git a/src/ip/tools.cc b/src/ip/tools.cc
+index c7a8ac8..1ccf64f 100644
+--- a/src/ip/tools.cc
++++ b/src/ip/tools.cc
+@@ -10,6 +10,7 @@
+ 
+ #include "squid.h"
+ #include "Debug.h"
++#include "ip/Address.h"
+ #include "ip/tools.h"
+ 
+ #if HAVE_UNISTD_H
+@@ -55,8 +56,21 @@ Ip::ProbeTransport()
+     debugs(3, 2, "Missing RFC 3493 compliance - attempting split IPv4 and IPv6 stacks ...");
+     EnableIpv6 |= IPV6_SPECIAL_SPLITSTACK;
+ #endif
+-    // TODO: attempt to use the socket to connect somewhere ?
+-    //  needs to be safe to contact and with guaranteed working IPv6 at the other end.
++
++    // Test for IPv6 loopback/localhost address binding
++    Ip::Address ip;
++    ip.setLocalhost();
++    if (ip.isIPv6()) { // paranoid; always succeeds if we got this far
++        struct sockaddr_in6 sin;
++        ip.getSockAddr(sin);
++        if (bind(s, reinterpret_cast<struct sockaddr *>(&sin), sizeof(sin)) != 0) {
++            debugs(3, DBG_CRITICAL, "WARNING: BCP 177 violation. Detected non-functional IPv6 loopback.");
++            EnableIpv6 = IPV6_OFF;
++        } else {
++            debugs(3, 2, "Detected functional IPv6 loopback ...");
++        }
++    }
++
+     close(s);
+ 
+ #if USE_IPV6
-- 
2.18.0


             reply	other threads:[~2019-02-04 17:54 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04 17:54 Matthias Fischer [this message]
2019-02-05 12:02 ` Michael Tremer
2019-02-05 17:37   ` Matthias Fischer
2019-02-05 20:58     ` Michael Tremer
2019-02-06 17:36       ` Matthias Fischer
2019-02-24 10:20       ` Matthias Fischer
2019-02-25 10:34         ` Michael Tremer
2019-02-25 18:17           ` Matthias Fischer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190204175435.8456-1-matthias.fischer@ipfire.org \
    --to=matthias.fischer@ipfire.org \
    --cc=development@lists.ipfire.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox