This reverts commit 92f6abf4e272672bb0a71cfe991261b95ebe2fef. --- src/python/importer.py | 14 ++++---- src/python/location-importer.in | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 7 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index de20f37..f19db4b 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -30,8 +30,8 @@ WHOIS_SOURCES = ( "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
# Asia Pacific Network Information Centre - #"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", - #"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", @@ -45,8 +45,8 @@ WHOIS_SOURCES = ( # XXX ???
# Réseaux IP Européens - #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", - #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", @@ -55,10 +55,10 @@ WHOIS_SOURCES = (
EXTENDED_SOURCES = ( # African Network Information Centre - "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest", + #"https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest",
# Asia Pacific Network Information Centre - "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest", + #"https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest",
# American Registry for Internet Numbers "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest", @@ -67,7 +67,7 @@ EXTENDED_SOURCES = ( "http://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest",
# Réseaux IP Européens - "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest", + #"https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest", )
class Downloader(object): diff --git a/src/python/location-importer.in b/src/python/location-importer.in index 77952f2..e3a07a0 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -393,6 +393,10 @@ class CLI(object): if line.startswith("aut-num:"): return self._parse_autnum_block(block)
+ # inetnum + if line.startswith("inet6num:") or line.startswith("inetnum:"): + return self._parse_inetnum_block(block) + # organisation elif line.startswith("organisation:"): return self._parse_org_block(block) @@ -422,6 +426,65 @@ class CLI(object): autnum.get("asn"), autnum.get("org"), )
+ def _parse_inetnum_block(self, block): + logging.debug("Parsing inetnum block:") + + inetnum = {} + for line in block: + logging.debug(line) + + # Split line + key, val = split_line(line) + + if key == "inetnum": + start_address, delim, end_address = val.partition("-") + + # Strip any excess space + start_address, end_address = start_address.rstrip(), end_address.strip() + + # Convert to IP address + try: + start_address = ipaddress.ip_address(start_address) + end_address = ipaddress.ip_address(end_address) + except ValueError: + logging.warning("Could not parse line: %s" % line) + return + + # Set prefix to default + prefix = 32 + + # Count number of addresses in this subnet + num_addresses = int(end_address) - int(start_address) + if num_addresses: + prefix -= math.log(num_addresses, 2) + + inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix) + + elif key == "inet6num": + inetnum[key] = val + + elif key == "country": + if val == "UNITED STATES": + val = "US" + + inetnum[key] = val.upper() + + # Skip empty objects + if not inetnum: + return + + network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False) + + # Bail out in case we have processed a non-public IP network + if network.is_private: + logging.warning("Skipping non-globally routable network: %s" % network) + return + + self.db.execute("INSERT INTO networks(network, country) \ + VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", + "%s" % network, inetnum.get("country"), + ) + def _parse_org_block(self, block): org = {} for line in block:
In contrast to ARIN and LACNIC, we are able to process more detailled feeds from those RIRs, avoiding storage of obviously unnecessary data.
Thanks to various SQL optimisations, doing so now takes less time than the first version of this did.
Signed-off-by: Michael Tremer michael.tremer@ipfire.org Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/python/location-importer.in | 89 ++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index e3a07a0..093f325 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -165,6 +165,7 @@ class CLI(object): -- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text); CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); + CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
-- overrides @@ -363,6 +364,16 @@ class CLI(object): CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL) ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); + + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL) + ON COMMIT DROP; + CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); + CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); + """) + + # Remove all previously imported content + self.db.execute(""" + TRUNCATE TABLE networks; """)
for source in location.importer.WHOIS_SOURCES: @@ -370,6 +381,67 @@ class CLI(object): for block in f: self._parse_block(block)
+ # Process all parsed networks from every RIR we happen to have access to, + # insert the largest network chunks into the networks table immediately... + families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)") + + for family in (row.family for row in families): + smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family) + + self.db.execute("INSERT INTO networks(network, country) \ + SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) + + # ... determine any other prefixes for this network family, ... + prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ + WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family) + + # ... and insert networks with this prefix in case they provide additional + # information (i. e. subnet of a larger chunk with a different country) + for prefix in (row.prefix for row in prefixes): + self.db.execute(""" + WITH candidates AS ( + SELECT + _rirdata.network, + _rirdata.country + FROM + _rirdata + WHERE + family(_rirdata.network) = %s + AND + masklen(_rirdata.network) = %s + ), + filtered AS ( + SELECT + DISTINCT ON (c.network) + c.network, + c.country, + masklen(networks.network), + networks.country AS parent_country + FROM + candidates c + LEFT JOIN + networks + ON + c.network << networks.network + ORDER BY + c.network, + masklen(networks.network) DESC NULLS LAST + ) + INSERT INTO + networks(network, country) + SELECT + network, + country + FROM + filtered + WHERE + parent_country IS NULL + OR + country <> parent_country + ON CONFLICT DO NOTHING""", + family, prefix, + ) + self.db.execute(""" INSERT INTO autnums(number, name) SELECT _autnums.number, _organizations.name FROM _autnums @@ -470,17 +542,30 @@ class CLI(object): inetnum[key] = val.upper()
# Skip empty objects - if not inetnum: + if not inetnum or not "country" in inetnum: return
network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
+ # Bail out in case we have processed a network covering the entire IP range, which + # is necessary to work around faulty (?) IPv6 network processing + if network.prefixlen == 0: + logging.warning("Skipping network covering the entire IP adress range: %s" % network) + return + + # Bail out in case we have processed a network whose prefix length indicates it is + # not globally routable (we have decided not to process them at the moment, as they + # significantly enlarge our database without providing very helpful additional information) + if (network.prefixlen > 24 and network.version == 4) or (network.prefixlen > 48 and network.version == 6): + logging.info("Skipping network too small to be publicly announced: %s" % network) + return + # Bail out in case we have processed a non-public IP network if network.is_private: logging.warning("Skipping non-globally routable network: %s" % network) return
- self.db.execute("INSERT INTO networks(network, country) \ + self.db.execute("INSERT INTO _rirdata(network, country) \ VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", "%s" % network, inetnum.get("country"), )