From mboxrd@z Thu Jan 1 00:00:00 1970 From: Peter =?utf-8?q?M=C3=BCller?= To: location@lists.ipfire.org Subject: [PATCH v2 2/3] importer: Import raw sources for inetnum's again Date: Sun, 20 Sep 2020 19:20:18 +0000 Message-ID: <54b93c52-9578-3390-d8d4-e889766dcb84@ipfire.org> In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============0712665270870787913==" List-Id: --===============0712665270870787913== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable The extended feeds do not have enough detailed information for us, so that we need to import inetnums from RIRs where possible. Filtering private networks is necessary as RIR data may contain 0.0.0.0/0 or similar entries for administrative purposes or due to misfilings. Special thanks goes to Michael for spending numerous hours on this, setting up a testing environment and providing helpful advice while debugging. Partially fixes: #12458 Cc: Michael Tremer Signed-off-by: Peter M=C3=BCller --- src/python/importer.py | 14 ++++---- src/python/location-importer.in | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/python/importer.py b/src/python/importer.py index de20f37..f19db4b 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -30,8 +30,8 @@ WHOIS_SOURCES =3D ( "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz", =20 # Asia Pacific Network Information Centre - #"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", - #"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", @@ -45,8 +45,8 @@ WHOIS_SOURCES =3D ( # XXX ??? =20 # R=C3=A9seaux IP Europ=C3=A9ens - #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", - #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", @@ -55,10 +55,10 @@ WHOIS_SOURCES =3D ( =20 EXTENDED_SOURCES =3D ( # African Network Information Centre - "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-lates= t", + #"https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-late= st", =20 # Asia Pacific Network Information Centre - "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest", + #"https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest", =20 # American Registry for Internet Numbers "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest", @@ -67,7 +67,7 @@ EXTENDED_SOURCES =3D ( "http://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest", =20 # R=C3=A9seaux IP Europ=C3=A9ens - "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest", + #"https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest", ) =20 class Downloader(object): diff --git a/src/python/location-importer.in b/src/python/location-importer.in index 77952f2..e3a07a0 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -393,6 +393,10 @@ class CLI(object): if line.startswith("aut-num:"): return self._parse_autnum_block(block) =20 + # inetnum + if line.startswith("inet6num:") or line.startswith("inetnum:"): + return self._parse_inetnum_block(block) + # organisation elif line.startswith("organisation:"): return self._parse_org_block(block) @@ -422,6 +426,65 @@ class CLI(object): autnum.get("asn"), autnum.get("org"), ) =20 + def _parse_inetnum_block(self, block): + logging.debug("Parsing inetnum block:") + + inetnum =3D {} + for line in block: + logging.debug(line) + + # Split line + key, val =3D split_line(line) + + if key =3D=3D "inetnum": + start_address, delim, end_address =3D val.partition("-") + + # Strip any excess space + start_address, end_address =3D start_address.rstrip(), end_address.strip= () + + # Convert to IP address + try: + start_address =3D ipaddress.ip_address(start_address) + end_address =3D ipaddress.ip_address(end_address) + except ValueError: + logging.warning("Could not parse line: %s" % line) + return + + # Set prefix to default + prefix =3D 32 + + # Count number of addresses in this subnet + num_addresses =3D int(end_address) - int(start_address) + if num_addresses: + prefix -=3D math.log(num_addresses, 2) + + inetnum["inetnum"] =3D "%s/%.0f" % (start_address, prefix) + + elif key =3D=3D "inet6num": + inetnum[key] =3D val + + elif key =3D=3D "country": + if val =3D=3D "UNITED STATES": + val =3D "US" + + inetnum[key] =3D val.upper() + + # Skip empty objects + if not inetnum: + return + + network =3D ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("i= netnum"), strict=3DFalse) + + # Bail out in case we have processed a non-public IP network + if network.is_private: + logging.warning("Skipping non-globally routable network: %s" % network) + return + + self.db.execute("INSERT INTO networks(network, country) \ + VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country =3D excluded.c= ountry", + "%s" % network, inetnum.get("country"), + ) + def _parse_org_block(self, block): org =3D {} for line in block: --=20 2.26.2 --===============0712665270870787913==--