From mboxrd@z Thu Jan  1 00:00:00 1970
From: Peter =?utf-8?q?M=C3=BCller?= <peter.mueller@ipfire.org>
To: location@lists.ipfire.org
Subject: [PATCH 6/8] location-importer.in: omit historic/orphaned RIR data
Date: Wed, 21 Oct 2020 14:47:41 +0000
Message-ID: <20201021144743.18083-6-peter.mueller@ipfire.org>
In-Reply-To: <20201021144743.18083-1-peter.mueller@ipfire.org>
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="===============8819715977395017955=="
List-Id: <location.lists.ipfire.org>

--===============8819715977395017955==
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable

Some RIRs include detailled information regarding networks not managed
by or allocated to themselves, particually APNIC. We need to filter
those networks (they usually have a characteristic network name) in
order to prevent operational quirks or returning wrong country codes.

Fixes: #12501
Partially fixes: #12499

Cc: Michael Tremer <michael.tremer(a)ipfire.org>
Signed-off-by: Peter M=C3=BCller <peter.mueller(a)ipfire.org>
---
 src/python/location-importer.in | 38 +++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index 20eb052..4f4a46d 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -484,38 +484,38 @@ class CLI(object):
 			return False
=20
 		if not network.is_global:
-			logging.warning("Skipping non-globally routable network: %s" % network)
+			log.warning("Skipping non-globally routable network: %s" % network)
 			return False
=20
 		if network.version =3D=3D 4:
 			if network.prefixlen < 7:
-				logging.warning("Skipping too big IP chunk: %s" % network)
+				log.warning("Skipping too big IP chunk: %s" % network)
 				return False
=20
 			if network.prefixlen > 24:
-				logging.info("Skipping network too small to be publicly announced: %s" %=
 network)
+				log.info("Skipping network too small to be publicly announced: %s" % net=
work)
 				return False
=20
 			if str(network.network_address) =3D=3D "0.0.0.0":
-				logging.warning("Skipping network based on 0.0.0.0: %s" % network)
+				log.warning("Skipping network based on 0.0.0.0: %s" % network)
 				return False
=20
 		elif network.version =3D=3D 6:
 			if network.prefixlen < 10:
-				logging.warning("Skipping too big IP chunk: %s" % network)
+				log.warning("Skipping too big IP chunk: %s" % network)
 				return False
=20
 			if network.prefixlen > 48:
-				logging.info("Skipping network too small to be publicly announced: %s" %=
 network)
+				log.info("Skipping network too small to be publicly announced: %s" % net=
work)
 				return False
=20
 			if str(network.network_address) =3D=3D "::":
-				logging.warning("Skipping network based on '::': %s" % network)
+				log.warning("Skipping network based on '::': %s" % network)
 				return False
=20
 		else:
 			# This should not happen...
-			logging.warning("Skipping network of unknown family, this should not happ=
en: %s" % network)
+			log.warning("Skipping network of unknown family, this should not happen: =
%s" % network)
 			return False
=20
 		# In case we have made it here, the network is considered to
@@ -564,15 +564,22 @@ class CLI(object):
 		)
=20
 	def _parse_inetnum_block(self, block):
-		logging.debug("Parsing inetnum block:")
+		log.debug("Parsing inetnum block:")
=20
 		inetnum =3D {}
 		for line in block:
-			logging.debug(line)
+			log.debug(line)
=20
 			# Split line
 			key, val =3D split_line(line)
=20
+			# Filter any inetnum records which are only referring to IP space
+			# not managed by that specific RIR...
+			if key =3D=3D "netname":
+				if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-N=
ETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
+					log.warning("Skipping record indicating historic/orphaned data: %s" % v=
al.strip())
+					return
+
 			if key =3D=3D "inetnum":
 				start_address, delim, end_address =3D val.partition("-")
=20
@@ -584,7 +591,7 @@ class CLI(object):
 					start_address =3D ipaddress.ip_address(start_address)
 					end_address   =3D ipaddress.ip_address(end_address)
 				except ValueError:
-					logging.warning("Could not parse line: %s" % line)
+					log.warning("Could not parse line: %s" % line)
 					return
=20
 				# Set prefix to default
@@ -601,15 +608,18 @@ class CLI(object):
 				inetnum[key] =3D val
=20
 			elif key =3D=3D "country":
-				if val =3D=3D "UNITED STATES":
-					val =3D "US"
-
 				inetnum[key] =3D val.upper()
=20
 		# Skip empty objects
 		if not inetnum or not "country" in inetnum:
 			return
=20
+		# Skip objects with bogus country code 'ZZ'
+		if inetnum.get("country") =3D=3D "ZZ":
+			log.warning("Skipping network with bogus country 'ZZ': %s" % \
+				(inetnum.get("inet6num") or inetnum.get("inetnum")))
+			return
+
 		network =3D ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("i=
netnum"), strict=3DFalse)
=20
 		if not self._check_parsed_network(network):
--=20
2.20.1


--===============8819715977395017955==--