Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
- ip_urls = [ - "https://www.spamhaus.org/drop/drop.txt", - "https://www.spamhaus.org/drop/edrop.txt", - "https://www.spamhaus.org/drop/dropv6.txt" + ip_lists = [ + ("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"), + ("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"), + ("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
- asn_urls = [ - "https://www.spamhaus.org/drop/asndrop.txt" + asn_lists = [ + ("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
- for url in ip_urls: - # Fetch IP list + for list in ip_lists: + name = list[0] + url = list[1] + + # Fetch IP list from given URL f = downloader.retrieve(url)
# Split into lines @@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute(""" - DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list'; - DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists'; - """) + DELETE FROM network_overrides WHERE source = '%s'; + """ % name, + ) else: - log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url) + log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue
# Iterate through every line, filter comments and add remaining networks to @@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network): - log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \ - (url, network)) + log.warning("Skipping bogus network found in %s (%s): %s" % \ + (name, url, network)) continue
# Conduct SQL statement... @@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network, - "Spamhaus DROP lists", + name, True )
- for url in asn_urls: + for list in asn_lists: + name = list[0] + url = list[1] + # Fetch URL f = downloader.retrieve(url)
+ # Split into lines + fcontent = f.readlines() + + # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP + # downloads. + if len(fcontent) > 10: + self.db.execute(""" + DELETE FROM autnum_overrides WHERE source = '%s'; + """ % name, + ) + else: + log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) + continue + # Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction(): @@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn): - log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \ - (url, asn)) + log.warning("Skipping bogus ASN found in %s (%s): %s" % \ + (name, url, asn)) continue
# Conduct SQL statement... @@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn, - "Spamhaus ASN-DROP list", + name, True )
Hi,
being curious I wanted to test this patch, but couldn't find 'location-importer.in' in 'src/scripts'. No chance to apply this patch.
Finally I found '.../build/usr/bin/location-importer'. Hm. Ok.
But how do I apply this patch?
Still being curious... ;-)
Matthias
On 18.08.2022 23:42, Peter Müller wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
Hello Matthias,
This patch is against libloc:
https://git.ipfire.org/?p=location/libloc.git;a=summary
-Michael
On 19 Aug 2022, at 09:21, Matthias Fischer matthias.fischer@ipfire.org wrote:
Hi,
being curious I wanted to test this patch, but couldn't find 'location-importer.in' in 'src/scripts'. No chance to apply this patch.
Finally I found '.../build/usr/bin/location-importer'. Hm. Ok.
But how do I apply this patch?
Still being curious... ;-)
Matthias
On 18.08.2022 23:42, Peter Müller wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
Hello Peter,
On 18 Aug 2022, at 22:42, Peter Müller peter.mueller@ipfire.org wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
Would it not be better to have this as a dict?
Or if you prefer something to iterate over, a tuple?
Would it not be better/easier if we would use a handle like “SPAMHAUS-DROP”, “SPAMHAUS-EDROP”, … instead of a string with spaces? It does not matter that much to me, but since this is a machine-readable string, should it not be less human formatted?
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
It would be more Pythonic if you wrote it like this:
for name, url in ip_lists: ...
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
No need for the ‘’ around the %s.
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
See above.
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
Is there a reason why this DELETE statement is not part of the transaction? If something goes wrong later on, you would have lost the previous data.
Would it also not be programmatically better to start the import and then count how many lines you are importing?
The “f.readlines()” call is something that I would consider a bit ugly.
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
-- 2.35.3
Hello Michael,
better late than never: Thanks for your reply and the comments.
Hello Peter,
On 18 Aug 2022, at 22:42, Peter Müller peter.mueller@ipfire.org wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
Would it not be better to have this as a dict?
Or if you prefer something to iterate over, a tuple?
Would it not be better/easier if we would use a handle like “SPAMHAUS-DROP”, “SPAMHAUS-EDROP”, … instead of a string with spaces? It does not matter that much to me, but since this is a machine-readable string, should it not be less human formatted?
Hm, good point. I have changed that in the upcoming second version of the patch.
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
It would be more Pythonic if you wrote it like this:
for name, url in ip_lists: ...
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
No need for the ‘’ around the %s.
Yes there is, the SQL command results in an exception otherwise.
Second version of the patch is coming right up.
Thanks, and best regards, Peter Müller
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
See above.
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
Is there a reason why this DELETE statement is not part of the transaction? If something goes wrong later on, you would have lost the previous data.
Would it also not be programmatically better to start the import and then count how many lines you are importing?
The “f.readlines()” call is something that I would consider a bit ugly.
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
-- 2.35.3
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted. The second version of this patch incorporates suggestions from Michael on the first version.
Tested-by: Peter Müller peter.mueller@ipfire.org Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/scripts/location-importer.in | 74 +++++++++++++++++++------------- 1 file changed, 44 insertions(+), 30 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..d405eb2 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,37 +1427,37 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
- ip_urls = [ - "https://www.spamhaus.org/drop/drop.txt", - "https://www.spamhaus.org/drop/edrop.txt", - "https://www.spamhaus.org/drop/dropv6.txt" + ip_lists = [ + ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"), + ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"), + ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt") ]
- asn_urls = [ - "https://www.spamhaus.org/drop/asndrop.txt" + asn_lists = [ + ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.txt") ]
- for url in ip_urls: - # Fetch IP list + for name, url in ip_lists: + # Fetch IP list from given URL f = downloader.retrieve(url)
# Split into lines fcontent = f.readlines()
- # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP - # downloads. - if len(fcontent) > 10: - self.db.execute(""" - DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list'; - DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists'; - """) - else: - log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url) - continue - - # Iterate through every line, filter comments and add remaining networks to - # the override table in case they are valid... with self.db.transaction(): + # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP + # downloads. + if len(fcontent) > 10: + self.db.execute(""" + DELETE FROM network_overrides WHERE source = '%s'; + """ % name, + ) + else: + log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) + continue + + # Iterate through every line, filter comments and add remaining networks to + # the override table in case they are valid... for sline in fcontent: # The response is assumed to be encoded in UTF-8... sline = sline.decode("utf-8") @@ -1475,8 +1475,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network): - log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \ - (url, network)) + log.warning("Skipping bogus network found in %s (%s): %s" % \ + (name, url, network)) continue
# Conduct SQL statement... @@ -1488,17 +1488,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network, - "Spamhaus DROP lists", + name, True )
- for url in asn_urls: + for name, url in asn_lists: # Fetch URL f = downloader.retrieve(url)
- # Iterate through every line, filter comments and add remaining ASNs to - # the override table in case they are valid... + # Split into lines + fcontent = f.readlines() + with self.db.transaction(): + # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP + # downloads. + if len(fcontent) > 10: + self.db.execute(""" + DELETE FROM autnum_overrides WHERE source = '%s'; + """ % name, + ) + else: + log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) + continue + + # Iterate through every line, filter comments and add remaining ASNs to + # the override table in case they are valid... for sline in f.readlines(): # The response is assumed to be encoded in UTF-8... sline = sline.decode("utf-8") @@ -1518,8 +1532,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn): - log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \ - (url, asn)) + log.warning("Skipping bogus ASN found in %s (%s): %s" % \ + (name, url, asn)) continue
# Conduct SQL statement... @@ -1531,7 +1545,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn, - "Spamhaus ASN-DROP list", + name, True )
Hello,
This looks a lot more Pythonic and okay to me.
I will merge this shortly.
-Michael
On 26 Sep 2022, at 19:26, Peter Müller peter.mueller@ipfire.org wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted. The second version of this patch incorporates suggestions from Michael on the first version.
Tested-by: Peter Müller peter.mueller@ipfire.org Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 74 +++++++++++++++++++------------- 1 file changed, 44 insertions(+), 30 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..d405eb2 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,37 +1427,37 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt") ]
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for name, url in ip_lists:
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
continue
# Iterate through every line, filter comments and add remaining networks to
# the override table in case they are valid... with self.db.transaction():
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM network_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
# Iterate through every line, filter comments and add remaining networks to
# the override table in case they are valid... for sline in fcontent: # The response is assumed to be encoded in UTF-8... sline = sline.decode("utf-8")
@@ -1475,8 +1475,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,17 +1488,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for name, url in asn_lists: # Fetch URL f = downloader.retrieve(url)
# Iterate through every line, filter comments and add remaining ASNs to
# the override table in case they are valid...
# Split into lines
fcontent = f.readlines()
with self.db.transaction():
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
# Iterate through every line, filter comments and add remaining ASNs to
# the override table in case they are valid... for sline in f.readlines(): # The response is assumed to be encoded in UTF-8... sline = sline.decode("utf-8")
@@ -1518,8 +1532,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1545,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
-- 2.35.3
Hello,
On 26 Sep 2022, at 19:26, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
better late than never: Thanks for your reply and the comments.
Hello Peter,
On 18 Aug 2022, at 22:42, Peter Müller peter.mueller@ipfire.org wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
Would it not be better to have this as a dict?
Or if you prefer something to iterate over, a tuple?
Would it not be better/easier if we would use a handle like “SPAMHAUS-DROP”, “SPAMHAUS-EDROP”, … instead of a string with spaces? It does not matter that much to me, but since this is a machine-readable string, should it not be less human formatted?
Hm, good point. I have changed that in the upcoming second version of the patch.
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
It would be more Pythonic if you wrote it like this:
for name, url in ip_lists: ...
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
No need for the ‘’ around the %s.
Yes there is, the SQL command results in an exception otherwise.
Err, you are allowing some SQL command injection here.
You do not want to use “%s” as a string format parameter for Python strings. It is more of a placeholder for the value that has to be escaped first - SQLite is using ? which is probably a better choice for Python.
The database binding is doing this automatically, but the arguments need to be passed after the query.
Under no circumstances whatsoever you want to chance the query dynamically. Never ever.
I fixed this here: https://git.ipfire.org/?p=location/libloc.git;a=commitdiff;h=b56bf4bf065130b...
Best, -Michael
Second version of the patch is coming right up.
Thanks, and best regards, Peter Müller
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
See above.
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
Is there a reason why this DELETE statement is not part of the transaction? If something goes wrong later on, you would have lost the previous data.
Would it also not be programmatically better to start the import and then count how many lines you are importing?
The “f.readlines()” call is something that I would consider a bit ugly.
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
-- 2.35.3
Hello Michael,
Hello,
On 26 Sep 2022, at 19:26, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
better late than never: Thanks for your reply and the comments.
Hello Peter,
On 18 Aug 2022, at 22:42, Peter Müller peter.mueller@ipfire.org wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
Would it not be better to have this as a dict?
Or if you prefer something to iterate over, a tuple?
Would it not be better/easier if we would use a handle like “SPAMHAUS-DROP”, “SPAMHAUS-EDROP”, … instead of a string with spaces? It does not matter that much to me, but since this is a machine-readable string, should it not be less human formatted?
Hm, good point. I have changed that in the upcoming second version of the patch.
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
It would be more Pythonic if you wrote it like this:
for name, url in ip_lists: ...
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
No need for the ‘’ around the %s.
Yes there is, the SQL command results in an exception otherwise.
Err, you are allowing some SQL command injection here.
You do not want to use “%s” as a string format parameter for Python strings. It is more of a placeholder for the value that has to be escaped first - SQLite is using ? which is probably a better choice for Python.
The database binding is doing this automatically, but the arguments need to be passed after the query.
Under no circumstances whatsoever you want to chance the query dynamically. Never ever.
I fixed this here: https://git.ipfire.org/?p=location/libloc.git;a=commitdiff;h=b56bf4bf065130b...
better late than never: Many thanks for spotting and fixing this before it went into anything stable or productive! Guess I'm not a good programmer after all...
All the best, Peter Müller
Best, -Michael
Second version of the patch is coming right up.
Thanks, and best regards, Peter Müller
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
See above.
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
Is there a reason why this DELETE statement is not part of the transaction? If something goes wrong later on, you would have lost the previous data.
Would it also not be programmatically better to start the import and then count how many lines you are importing?
The “f.readlines()” call is something that I would consider a bit ugly.
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
-- 2.35.3
This is the reason why we are doing code review...
On 2 Oct 2022, at 12:04, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
Hello,
On 26 Sep 2022, at 19:26, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
better late than never: Thanks for your reply and the comments.
Hello Peter,
On 18 Aug 2022, at 22:42, Peter Müller peter.mueller@ipfire.org wrote:
Previously, the lack of distinction between different DROP lists caused only the last one to be persisted.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 8d47497..e4a9ab0 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -1427,18 +1427,21 @@ class CLI(object): def _update_overrides_for_spamhaus_drop(self): downloader = location.importer.Downloader()
ip_urls = [
"https://www.spamhaus.org/drop/drop.txt",
"https://www.spamhaus.org/drop/edrop.txt",
"https://www.spamhaus.org/drop/dropv6.txt"
ip_lists = [
("Spamhaus DROP list", "https://www.spamhaus.org/drop/drop.txt"),
("Spamhaus EDROP list", "https://www.spamhaus.org/drop/edrop.txt"),
("Spamhaus DROPv6 list", "https://www.spamhaus.org/drop/dropv6.txt") ]
Would it not be better to have this as a dict?
Or if you prefer something to iterate over, a tuple?
Would it not be better/easier if we would use a handle like “SPAMHAUS-DROP”, “SPAMHAUS-EDROP”, … instead of a string with spaces? It does not matter that much to me, but since this is a machine-readable string, should it not be less human formatted?
Hm, good point. I have changed that in the upcoming second version of the patch.
asn_urls = [
"https://www.spamhaus.org/drop/asndrop.txt"
asn_lists = [
("Spamhaus ASN-DROP list", "https://www.spamhaus.org/drop/asndrop.txt") ]
for url in ip_urls:
# Fetch IP list
for list in ip_lists:
name = list[0]
url = list[1]
It would be more Pythonic if you wrote it like this:
for name, url in ip_lists: ...
# Fetch IP list from given URL f = downloader.retrieve(url) # Split into lines
@@ -1448,11 +1451,11 @@ class CLI(object): # downloads. if len(fcontent) > 10: self.db.execute("""
DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
""")
DELETE FROM network_overrides WHERE source = '%s';
No need for the ‘’ around the %s.
Yes there is, the SQL command results in an exception otherwise.
Err, you are allowing some SQL command injection here.
You do not want to use “%s” as a string format parameter for Python strings. It is more of a placeholder for the value that has to be escaped first - SQLite is using ? which is probably a better choice for Python.
The database binding is doing this automatically, but the arguments need to be passed after the query.
Under no circumstances whatsoever you want to chance the query dynamically. Never ever.
I fixed this here: https://git.ipfire.org/?p=location/libloc.git;a=commitdiff;h=b56bf4bf065130b...
better late than never: Many thanks for spotting and fixing this before it went into anything stable or productive! Guess I'm not a good programmer after all...
All the best, Peter Müller
Best, -Michael
Second version of the patch is coming right up.
Thanks, and best regards, Peter Müller
""" % name,
) else:
log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
log.error("%s (%s) returned likely bogus file, ignored" % (name, url)) continue # Iterate through every line, filter comments and add remaining networks to
@@ -1475,8 +1478,8 @@ class CLI(object):
# Sanitize parsed networks... if not self._check_parsed_network(network):
log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
(url, network))
log.warning("Skipping bogus network found in %s (%s): %s" % \
(name, url, network)) continue # Conduct SQL statement...
@@ -1488,14 +1491,31 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (network) DO UPDATE SET is_drop = True""", "%s" % network,
"Spamhaus DROP lists",
name, True )
for url in asn_urls:
for list in asn_lists:
name = list[0]
url = list[1]
See above.
# Fetch URL f = downloader.retrieve(url)
# Split into lines
fcontent = f.readlines()
# Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
# downloads.
if len(fcontent) > 10:
self.db.execute("""
DELETE FROM autnum_overrides WHERE source = '%s';
""" % name,
)
else:
log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
continue
Is there a reason why this DELETE statement is not part of the transaction? If something goes wrong later on, you would have lost the previous data.
Would it also not be programmatically better to start the import and then count how many lines you are importing?
The “f.readlines()” call is something that I would consider a bit ugly.
# Iterate through every line, filter comments and add remaining ASNs to # the override table in case they are valid... with self.db.transaction():
@@ -1518,8 +1538,8 @@ class CLI(object):
# Filter invalid ASNs... if not self._check_parsed_asn(asn):
log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
(url, asn))
log.warning("Skipping bogus ASN found in %s (%s): %s" % \
(name, url, asn)) continue # Conduct SQL statement...
@@ -1531,7 +1551,7 @@ class CLI(object): ) VALUES (%s, %s, %s) ON CONFLICT (number) DO UPDATE SET is_drop = True""", "%s" % asn,
"Spamhaus ASN-DROP list",
name, True )
-- 2.35.3