From: Michael Tremer michael.tremer@ipfire.org
This patch removes the possibility that any IP address ranges might show up in multiple exported files.
If this was content from the database:
* 10.0.0.0/16 - DE * 10.0.1.0/24 - FR
Then the IP address 10.0.1.1 would match for both countries.
The algorithm will now break the larger /16 subnet apart into smaller subnets so that 10.0.1.0/24 is no longer overlapped.
There was some time spent on this to make this as efficient as possible.
Signed-off-by: Michael Tremer michael.tremer@ipfire.org --- src/python/export.py | 154 ++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 44 deletions(-)
diff --git a/src/python/export.py b/src/python/export.py index 5eaf43f..dd44332 100644 --- a/src/python/export.py +++ b/src/python/export.py @@ -89,28 +89,55 @@ class OutputWriter(object):
def write(self, network, subnets): if self.flatten and self._flatten(network): - log.debug("Skipping writing network %s" % network) + log.debug("Skipping writing network %s (last one was %s)" % (network, self._last_network)) return
+ # Convert network into a Python object + _network = ipaddress.ip_network("%s" % network) + # Write the network when it has no subnets if not subnets: - network = ipaddress.ip_network("%s" % network) - return self._write_network(network) + log.debug("Writing %s to %s" % (_network, self.f)) + return self._write_network(_network) + + # Convert subnets into Python objects + _subnets = [ipaddress.ip_network("%s" % subnet) for subnet in subnets] + + # Split the network into smaller bits so that + # we can accomodate for any gaps in it later + to_check = set() + for _subnet in _subnets: + to_check.update( + _network.address_exclude(_subnet) + ) + + # Clear the list of all subnets + subnets = [] + + # Check if all subnets to not overlap with anything else + while to_check: + subnet_to_check = to_check.pop()
- # Collect all matching subnets - matching_subnets = [] + for _subnet in _subnets: + # Drop this subnet if it equals one of the subnets + # or if it is subnet of one of them + if subnet_to_check == _subnet or subnet_to_check.subnet_of(_subnet): + break
- for subnet in sorted(subnets): - # Try to find the subnet in the database - n = self.db.lookup("%s" % subnet.network_address) + # Break it down if it overlaps + if subnet_to_check.overlaps(_subnet): + to_check.update( + subnet_to_check.address_exclude(_subnet) + ) + break
- # No entry or matching country means those IP addresses belong here - if not n or n.country_code == network.country_code: - matching_subnets.append(subnet) + # Add the subnet again as it passed the check + else: + subnets.append(subnet_to_check)
# Write all networks as compact as possible - for network in ipaddress.collapse_addresses(matching_subnets): - log.debug("Writing %s to database" % network) + for network in ipaddress.collapse_addresses(subnets): + log.debug("Writing %s to %s" % (network, self.f)) self._write_network(network)
def finish(self): @@ -206,40 +233,40 @@ class Exporter(object): # Get all networks that match the family networks = self.db.search_networks(family=family)
- # Materialise the generator into a list (uses quite some memory) - networks = list(networks) + # Create a stack with all networks in order where we can put items back + # again and retrieve them in the next iteration. + networks = BufferedStack(networks)
# Walk through all networks - for i, network in enumerate(networks): - _network = ipaddress.ip_network("%s" % network) - - # Search for all subnets - subnets = set() - - while i < len(networks): - subnet = networks[i+1] - - if subnet.is_subnet_of(network): - _subnet = ipaddress.ip_network("%s" % subnet) - - subnets.add(_subnet) - subnets.update(_network.address_exclude(_subnet)) - - i += 1 - else: + for network in networks: + # Collect all networks which are a subnet of network + subnets = [] + for subnet in networks: + # If the next subnet was not a subnet, we have to push + # it back on the stack and break this loop + if not subnet.is_subnet_of(network): + networks.push(subnet) break
+ subnets.append(subnet) + # Write matching countries - try: - writers[network.country_code].write(network, subnets) - except KeyError: - pass + if network.country_code and network.country_code in writers: + # Mismatching subnets + gaps = [ + subnet for subnet in subnets if not network.country_code == subnet.country_code + ] + + writers[network.country_code].write(network, gaps)
# Write matching ASNs - try: - writers[network.asn].write(network, subnets) - except KeyError: - pass + if network.asn and network.asn in writers: + # Mismatching subnets + gaps = [ + subnet for subnet in subnets if not network.asn == subnet.asn + ] + + writers[network.asn].write(network, gaps)
# Handle flags for flag in flags: @@ -247,10 +274,19 @@ class Exporter(object): # Fetch the "fake" country code country = flags[flag]
- try: - writers[country].write(network, subnets) - except KeyError: - pass + if not country in writers: + continue + + gaps = [ + subnet for subnet in subnets + if not subnet.has_flag(flag) + ] + + writers[country].write(network, gaps) + + # Push all subnets back onto the stack + for subnet in reversed(subnets): + networks.push(subnet)
# Write everything to the filesystem for writer in writers.values(): @@ -262,3 +298,33 @@ class Exporter(object): )
return os.path.join(directory, filename) + + +class BufferedStack(object): + """ + This class takes an iterator and when being iterated + over it returns objects from that iterator for as long + as there are any. + + It additionally has a function to put an item back on + the back so that it will be returned again at the next + iteration. + """ + def __init__(self, iterator): + self.iterator = iterator + self.stack = [] + + def __iter__(self): + return self + + def __next__(self): + if self.stack: + return self.stack.pop(0) + + return next(self.iterator) + + def push(self, elem): + """ + Takes an element and puts it on the stack + """ + self.stack.insert(0, elem)