public inbox for location@lists.ipfire.org
 help / color / mirror / Atom feed
* [PATCH] location-importer.in: track original countries as well
@ 2021-05-14 16:55 Peter Müller
  2021-05-18 10:38 ` Michael Tremer
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Müller @ 2021-05-14 16:55 UTC (permalink / raw)
  To: location

[-- Attachment #1: Type: text/plain, Size: 6147 bytes --]

This helps us to determine how many network objects have more than one
country set, and what their original country code set looked like.

Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
---
 src/python/location-importer.in | 53 ++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index e5f55af..c7162cf 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -164,7 +164,7 @@ class CLI(object):
 				CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
 
 				-- networks
-				CREATE TABLE IF NOT EXISTS networks(network inet, country text);
+				CREATE TABLE IF NOT EXISTS networks(network inet, country text, original_countries text[]);
 				CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
 				CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
 				CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
@@ -377,7 +377,7 @@ class CLI(object):
 					ON COMMIT DROP;
 				CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
 
-				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
+				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[])
 					ON COMMIT DROP;
 				CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
 				CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -407,8 +407,8 @@ class CLI(object):
 			for family in (row.family for row in families):
 				smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
 
-				self.db.execute("INSERT INTO networks(network, country) \
-					SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
+				self.db.execute("INSERT INTO networks(network, country, original_countries) \
+					SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
 
 				# ... determine any other prefixes for this network family, ...
 				prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -421,7 +421,8 @@ class CLI(object):
 						WITH candidates AS (
 							SELECT
 								_rirdata.network,
-								_rirdata.country
+								_rirdata.country,
+								_rirdata.original_countries
 							FROM
 								_rirdata
 							WHERE
@@ -434,6 +435,7 @@ class CLI(object):
 								DISTINCT ON (c.network)
 								c.network,
 								c.country,
+								c.original_countries,
 								masklen(networks.network),
 								networks.country AS parent_country
 							FROM
@@ -447,10 +449,11 @@ class CLI(object):
 								masklen(networks.network) DESC NULLS LAST
 						)
 						INSERT INTO
-							networks(network, country)
+							networks(network, country, original_countries)
 						SELECT
 							network,
-							country
+							country,
+							original_countries
 						FROM
 							filtered
 						WHERE
@@ -617,28 +620,36 @@ class CLI(object):
 				inetnum[key] = [ipaddress.ip_network(val, strict=False)]
 
 			elif key == "country":
-				inetnum[key] = val.upper()
+				# Catch RIR data objects with more than one country code...
+				if not key in inetnum.keys():
+					inetnum[key] = []
+				else:
+					if val.upper() in inetnum.get("country"):
+						# ... but keep this list distinct...
+						continue
+
+				inetnum[key].append(val.upper())
 
 		# Skip empty objects
 		if not inetnum or not "country" in inetnum:
 			return
 
+		# Prepare skipping objects with unknown country codes...
+		invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
+
 		# Iterate through all networks enumerated from above, check them for plausibility and insert
 		# them into the database, if _check_parsed_network() succeeded
 		for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
 			if self._check_parsed_network(single_network):
-
-				# Skip objects with unknown country codes - to avoid log spam for invalid or too small
-				# networks, this check is - kinda ugly - done at this point
-				if validcountries and inetnum.get("country") not in validcountries:
-					log.warning("Skipping network with bogus country '%s': %s" % \
-						(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
+				# Skip objects with unknown country codes if they are valid to avoid log spam...
+				if validcountries and invalidcountries:
+					log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
+						(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
 					break
 
-				# Everything is fine here, run INSERT statement...
-				self.db.execute("INSERT INTO _rirdata(network, country) \
-					VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
-					"%s" % single_network, inetnum.get("country"),
+				self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
+					VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
+					"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
 				)
 
 	def _parse_org_block(self, block):
@@ -729,10 +740,10 @@ class CLI(object):
 		if not self._check_parsed_network(network):
 			return
 
-		self.db.execute("INSERT INTO networks(network, country) \
-			VALUES(%s, %s) ON CONFLICT (network) DO \
+		self.db.execute("INSERT INTO networks(network, country, original_countries) \
+			VALUES(%s, %s, %s) ON CONFLICT (network) DO \
 			UPDATE SET country = excluded.country",
-			"%s" % network, country,
+			"%s" % network, country, [country],
 		)
 
 	def handle_update_announcements(self, ns):
-- 
2.26.2

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-05-21  9:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-14 16:55 [PATCH] location-importer.in: track original countries as well Peter Müller
2021-05-18 10:38 ` Michael Tremer
2021-05-21  9:23   ` Peter Müller
2021-05-21  9:24     ` Michael Tremer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox