From mboxrd@z Thu Jan 1 00:00:00 1970 From: Michael Tremer To: location@lists.ipfire.org Subject: Re: [PATCH v2] location-importer.in: Conduct sanity checks per DROP list Date: Tue, 27 Sep 2022 10:17:11 +0100 Message-ID: <71AEB78A-D75A-4306-BCB1-9B8E4F56B063@ipfire.org> In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============4358501911737228415==" List-Id: --===============4358501911737228415== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Hello, This looks a lot more Pythonic and okay to me. I will merge this shortly. -Michael > On 26 Sep 2022, at 19:26, Peter M=C3=BCller wr= ote: >=20 > Previously, the lack of distinction between different DROP lists caused > only the last one to be persisted. The second version of this patch > incorporates suggestions from Michael on the first version. >=20 > Tested-by: Peter M=C3=BCller > Signed-off-by: Peter M=C3=BCller > --- > src/scripts/location-importer.in | 74 +++++++++++++++++++------------- > 1 file changed, 44 insertions(+), 30 deletions(-) >=20 > diff --git a/src/scripts/location-importer.in b/src/scripts/location-import= er.in > index 8d47497..d405eb2 100644 > --- a/src/scripts/location-importer.in > +++ b/src/scripts/location-importer.in > @@ -1427,37 +1427,37 @@ class CLI(object): > def _update_overrides_for_spamhaus_drop(self): > downloader =3D location.importer.Downloader() >=20 > - ip_urls =3D [ > - "https://www.spamhaus.org/drop/drop.txt", > - "https://www.spamhaus.org/drop/edrop.txt", > - "https://www.spamhaus.org/drop/dropv6.txt" > + ip_lists =3D [ > + ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"), > + ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"), > + ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt") > ] >=20 > - asn_urls =3D [ > - "https://www.spamhaus.org/drop/asndrop.txt" > + asn_lists =3D [ > + ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.txt") > ] >=20 > - for url in ip_urls: > - # Fetch IP list > + for name, url in ip_lists: > + # Fetch IP list from given URL > f =3D downloader.retrieve(url) >=20 > # Split into lines > fcontent =3D f.readlines() >=20 > - # Conduct a very basic sanity check to rule out CDN issues causing bogu= s DROP > - # downloads. > - if len(fcontent) > 10: > - self.db.execute(""" > - DELETE FROM autnum_overrides WHERE source =3D 'Spamhaus ASN-DROP list= '; > - DELETE FROM network_overrides WHERE source =3D 'Spamhaus DROP lists'; > - """) > - else: > - log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" %= url) > - continue > - > - # Iterate through every line, filter comments and add remaining network= s to > - # the override table in case they are valid... > with self.db.transaction(): > + # Conduct a very basic sanity check to rule out CDN issues causing bog= us DROP > + # downloads. > + if len(fcontent) > 10: > + self.db.execute(""" > + DELETE FROM network_overrides WHERE source =3D '%s'; > + """ % name, > + ) > + else: > + log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) > + continue > + > + # Iterate through every line, filter comments and add remaining networ= ks to > + # the override table in case they are valid... > for sline in fcontent: > # The response is assumed to be encoded in UTF-8... > sline =3D sline.decode("utf-8") > @@ -1475,8 +1475,8 @@ class CLI(object): >=20 > # Sanitize parsed networks... > if not self._check_parsed_network(network): > - log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %= s" % \ > - (url, network)) > + log.warning("Skipping bogus network found in %s (%s): %s" % \ > + (name, url, network)) > continue >=20 > # Conduct SQL statement... > @@ -1488,17 +1488,31 @@ class CLI(object): > ) VALUES (%s, %s, %s) > ON CONFLICT (network) DO UPDATE SET is_drop =3D True""", > "%s" % network, > - "Spamhaus DROP lists", > + name, > True > ) >=20 > - for url in asn_urls: > + for name, url in asn_lists: > # Fetch URL > f =3D downloader.retrieve(url) >=20 > - # Iterate through every line, filter comments and add remaining ASNs to > - # the override table in case they are valid... > + # Split into lines > + fcontent =3D f.readlines() > + > with self.db.transaction(): > + # Conduct a very basic sanity check to rule out CDN issues causing bog= us DROP > + # downloads. > + if len(fcontent) > 10: > + self.db.execute(""" > + DELETE FROM autnum_overrides WHERE source =3D '%s'; > + """ % name, > + ) > + else: > + log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) > + continue > + > + # Iterate through every line, filter comments and add remaining ASNs to > + # the override table in case they are valid... > for sline in f.readlines(): > # The response is assumed to be encoded in UTF-8... > sline =3D sline.decode("utf-8") > @@ -1518,8 +1532,8 @@ class CLI(object): >=20 > # Filter invalid ASNs... > if not self._check_parsed_asn(asn): > - log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" %= \ > - (url, asn)) > + log.warning("Skipping bogus ASN found in %s (%s): %s" % \ > + (name, url, asn)) > continue >=20 > # Conduct SQL statement... > @@ -1531,7 +1545,7 @@ class CLI(object): > ) VALUES (%s, %s, %s) > ON CONFLICT (number) DO UPDATE SET is_drop =3D True""", > "%s" % asn, > - "Spamhaus ASN-DROP list", > + name, > True > ) >=20 > --=20 > 2.35.3 --===============4358501911737228415==--