From: "Peter Müller" <peter.mueller@ipfire.org>
To: location@lists.ipfire.org
Subject: [PATCH 3/8] export.py: fix exporting IP networks for crappy xt_geoip module
Date: Wed, 21 Oct 2020 14:47:38 +0000 [thread overview]
Message-ID: <20201021144743.18083-3-peter.mueller@ipfire.org> (raw)
In-Reply-To: <20201021144743.18083-1-peter.mueller@ipfire.org>
[-- Attachment #1: Type: text/plain, Size: 5355 bytes --]
In contrast to the location database itself, the xt_geoip module
consumes a list of IP networks for each country, and returns after the
first match.
We therefore need to...
(a) sort IP networks by their size, allow as precise matches as possible
(b) export _any_ IP networks - including inverted subnets - to prevent
undefined overlaps
(c) do the entire thing as fast as possible, consuming as less disk
space as possible, which is why we can't just iterate over all /24
chunks
Partially fixes: #12499
Signed-off-by: Michael Tremer <michael.tremer(a)ipfire.org>
Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
---
src/python/export.py | 69 ++++++++++++++++++++++++++++++++++----------
1 file changed, 54 insertions(+), 15 deletions(-)
diff --git a/src/python/export.py b/src/python/export.py
index d15c6f0..5eaf43f 100644
--- a/src/python/export.py
+++ b/src/python/export.py
@@ -39,8 +39,8 @@ class OutputWriter(object):
suffix = "networks"
mode = "w"
- def __init__(self, f, prefix=None, flatten=True):
- self.f, self.prefix, self.flatten = f, prefix, flatten
+ def __init__(self, db, f, prefix=None, flatten=True):
+ self.db, self.f, self.prefix, self.flatten = db, f, prefix, flatten
# The previously written network
self._last_network = None
@@ -49,13 +49,13 @@ class OutputWriter(object):
self._write_header()
@classmethod
- def open(cls, filename, **kwargs):
+ def open(cls, db, filename, **kwargs):
"""
Convenience function to open a file
"""
f = open(filename, cls.mode)
- return cls(f, **kwargs)
+ return cls(db, f, **kwargs)
def __repr__(self):
return "<%s f=%s>" % (self.__class__.__name__, self.f)
@@ -87,13 +87,31 @@ class OutputWriter(object):
def _write_network(self, network):
self.f.write("%s\n" % network)
- def write(self, network):
+ def write(self, network, subnets):
if self.flatten and self._flatten(network):
log.debug("Skipping writing network %s" % network)
return
- # Write the network to file
- self._write_network(network)
+ # Write the network when it has no subnets
+ if not subnets:
+ network = ipaddress.ip_network("%s" % network)
+ return self._write_network(network)
+
+ # Collect all matching subnets
+ matching_subnets = []
+
+ for subnet in sorted(subnets):
+ # Try to find the subnet in the database
+ n = self.db.lookup("%s" % subnet.network_address)
+
+ # No entry or matching country means those IP addresses belong here
+ if not n or n.country_code == network.country_code:
+ matching_subnets.append(subnet)
+
+ # Write all networks as compact as possible
+ for network in ipaddress.collapse_addresses(matching_subnets):
+ log.debug("Writing %s to database" % network)
+ self._write_network(network)
def finish(self):
"""
@@ -143,10 +161,10 @@ class XTGeoIPOutputWriter(OutputWriter):
mode = "wb"
def _write_network(self, network):
- for address in (network.first_address, network.last_address):
+ for address in (network.network_address, network.broadcast_address):
# Convert this into a string of bits
bytes = socket.inet_pton(
- network.family, address,
+ socket.AF_INET6 if network.version == 6 else socket.AF_INET, "%s" % address,
)
self.f.write(bytes)
@@ -175,7 +193,7 @@ class Exporter(object):
directory, prefix=country_code, suffix=self.writer.suffix, family=family,
)
- writers[country_code] = self.writer.open(filename, prefix="CC_%s" % country_code)
+ writers[country_code] = self.writer.open(self.db, filename, prefix="CC_%s" % country_code)
# Create writers for ASNs
for asn in asns:
@@ -183,22 +201,43 @@ class Exporter(object):
directory, "AS%s" % asn, suffix=self.writer.suffix, family=family,
)
- writers[asn] = self.writer.open(filename, prefix="AS%s" % asn)
+ writers[asn] = self.writer.open(self.db, filename, prefix="AS%s" % asn)
# Get all networks that match the family
networks = self.db.search_networks(family=family)
+ # Materialise the generator into a list (uses quite some memory)
+ networks = list(networks)
+
# Walk through all networks
- for network in networks:
+ for i, network in enumerate(networks):
+ _network = ipaddress.ip_network("%s" % network)
+
+ # Search for all subnets
+ subnets = set()
+
+ while i < len(networks):
+ subnet = networks[i+1]
+
+ if subnet.is_subnet_of(network):
+ _subnet = ipaddress.ip_network("%s" % subnet)
+
+ subnets.add(_subnet)
+ subnets.update(_network.address_exclude(_subnet))
+
+ i += 1
+ else:
+ break
+
# Write matching countries
try:
- writers[network.country_code].write(network)
+ writers[network.country_code].write(network, subnets)
except KeyError:
pass
# Write matching ASNs
try:
- writers[network.asn].write(network)
+ writers[network.asn].write(network, subnets)
except KeyError:
pass
@@ -209,7 +248,7 @@ class Exporter(object):
country = flags[flag]
try:
- writers[country].write(network)
+ writers[country].write(network, subnets)
except KeyError:
pass
--
2.20.1
next prev parent reply other threads:[~2020-10-21 14:47 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-21 14:47 [PATCH 1/8] Revert "Revert "Revert "Revert "importer: Import raw sources for inetnum's again"""" Peter Müller
2020-10-21 14:47 ` [PATCH 2/8] Revert "Revert "location-importer.in: only import relevant data from AFRINIC, APNIC and RIPE"" Peter Müller
2020-10-21 14:47 ` Peter Müller [this message]
2020-10-21 14:47 ` [PATCH 4/8] location-importer.in: filter bogus IP networks for both Whois and extended sources Peter Müller
2020-10-21 14:47 ` [PATCH 5/8] importer.py: fetch LACNIC data via HTTPS Peter Müller
2020-10-21 14:47 ` [PATCH 6/8] location-importer.in: omit historic/orphaned RIR data Peter Müller
2020-10-21 14:47 ` [PATCH 7/8] location-importer.in: Create gist index for announcement table as well Peter Müller
2020-10-21 14:47 ` [PATCH 8/8] location-importer.in: avoid log spam for too small networks Peter Müller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201021144743.18083-3-peter.mueller@ipfire.org \
--to=peter.mueller@ipfire.org \
--cc=location@lists.ipfire.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox