public inbox for location@lists.ipfire.org
 help / color / mirror / Atom feed
* [PATCH] location-importer.in: track original countries as well
@ 2021-05-14 16:55 Peter Müller
  2021-05-18 10:38 ` Michael Tremer
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Müller @ 2021-05-14 16:55 UTC (permalink / raw)
  To: location

[-- Attachment #1: Type: text/plain, Size: 6147 bytes --]

This helps us to determine how many network objects have more than one
country set, and what their original country code set looked like.

Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
---
 src/python/location-importer.in | 53 ++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index e5f55af..c7162cf 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -164,7 +164,7 @@ class CLI(object):
 				CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
 
 				-- networks
-				CREATE TABLE IF NOT EXISTS networks(network inet, country text);
+				CREATE TABLE IF NOT EXISTS networks(network inet, country text, original_countries text[]);
 				CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
 				CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
 				CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
@@ -377,7 +377,7 @@ class CLI(object):
 					ON COMMIT DROP;
 				CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
 
-				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
+				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[])
 					ON COMMIT DROP;
 				CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
 				CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -407,8 +407,8 @@ class CLI(object):
 			for family in (row.family for row in families):
 				smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
 
-				self.db.execute("INSERT INTO networks(network, country) \
-					SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
+				self.db.execute("INSERT INTO networks(network, country, original_countries) \
+					SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
 
 				# ... determine any other prefixes for this network family, ...
 				prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -421,7 +421,8 @@ class CLI(object):
 						WITH candidates AS (
 							SELECT
 								_rirdata.network,
-								_rirdata.country
+								_rirdata.country,
+								_rirdata.original_countries
 							FROM
 								_rirdata
 							WHERE
@@ -434,6 +435,7 @@ class CLI(object):
 								DISTINCT ON (c.network)
 								c.network,
 								c.country,
+								c.original_countries,
 								masklen(networks.network),
 								networks.country AS parent_country
 							FROM
@@ -447,10 +449,11 @@ class CLI(object):
 								masklen(networks.network) DESC NULLS LAST
 						)
 						INSERT INTO
-							networks(network, country)
+							networks(network, country, original_countries)
 						SELECT
 							network,
-							country
+							country,
+							original_countries
 						FROM
 							filtered
 						WHERE
@@ -617,28 +620,36 @@ class CLI(object):
 				inetnum[key] = [ipaddress.ip_network(val, strict=False)]
 
 			elif key == "country":
-				inetnum[key] = val.upper()
+				# Catch RIR data objects with more than one country code...
+				if not key in inetnum.keys():
+					inetnum[key] = []
+				else:
+					if val.upper() in inetnum.get("country"):
+						# ... but keep this list distinct...
+						continue
+
+				inetnum[key].append(val.upper())
 
 		# Skip empty objects
 		if not inetnum or not "country" in inetnum:
 			return
 
+		# Prepare skipping objects with unknown country codes...
+		invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
+
 		# Iterate through all networks enumerated from above, check them for plausibility and insert
 		# them into the database, if _check_parsed_network() succeeded
 		for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
 			if self._check_parsed_network(single_network):
-
-				# Skip objects with unknown country codes - to avoid log spam for invalid or too small
-				# networks, this check is - kinda ugly - done at this point
-				if validcountries and inetnum.get("country") not in validcountries:
-					log.warning("Skipping network with bogus country '%s': %s" % \
-						(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
+				# Skip objects with unknown country codes if they are valid to avoid log spam...
+				if validcountries and invalidcountries:
+					log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
+						(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
 					break
 
-				# Everything is fine here, run INSERT statement...
-				self.db.execute("INSERT INTO _rirdata(network, country) \
-					VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
-					"%s" % single_network, inetnum.get("country"),
+				self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
+					VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
+					"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
 				)
 
 	def _parse_org_block(self, block):
@@ -729,10 +740,10 @@ class CLI(object):
 		if not self._check_parsed_network(network):
 			return
 
-		self.db.execute("INSERT INTO networks(network, country) \
-			VALUES(%s, %s) ON CONFLICT (network) DO \
+		self.db.execute("INSERT INTO networks(network, country, original_countries) \
+			VALUES(%s, %s, %s) ON CONFLICT (network) DO \
 			UPDATE SET country = excluded.country",
-			"%s" % network, country,
+			"%s" % network, country, [country],
 		)
 
 	def handle_update_announcements(self, ns):
-- 
2.26.2

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] location-importer.in: track original countries as well
  2021-05-14 16:55 [PATCH] location-importer.in: track original countries as well Peter Müller
@ 2021-05-18 10:38 ` Michael Tremer
  2021-05-21  9:23   ` Peter Müller
  0 siblings, 1 reply; 4+ messages in thread
From: Michael Tremer @ 2021-05-18 10:38 UTC (permalink / raw)
  To: location

[-- Attachment #1: Type: text/plain, Size: 6754 bytes --]

Hello,

Thank you for submitting the patch. It looks okay to me.

But merging this would break existing databases. Could we not add an ALTER TABLE statement in order to add the original_countries column to the networks table?

-Michael

> On 14 May 2021, at 17:55, Peter Müller <peter.mueller(a)ipfire.org> wrote:
> 
> This helps us to determine how many network objects have more than one
> country set, and what their original country code set looked like.
> 
> Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
> ---
> src/python/location-importer.in | 53 ++++++++++++++++++++-------------
> 1 file changed, 32 insertions(+), 21 deletions(-)
> 
> diff --git a/src/python/location-importer.in b/src/python/location-importer.in
> index e5f55af..c7162cf 100644
> --- a/src/python/location-importer.in
> +++ b/src/python/location-importer.in
> @@ -164,7 +164,7 @@ class CLI(object):
> 				CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
> 
> 				-- networks
> -				CREATE TABLE IF NOT EXISTS networks(network inet, country text);
> +				CREATE TABLE IF NOT EXISTS networks(network inet, country text, original_countries text[]);
> 				CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
> 				CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
> 				CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
> @@ -377,7 +377,7 @@ class CLI(object):
> 					ON COMMIT DROP;
> 				CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
> 
> -				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
> +				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[])
> 					ON COMMIT DROP;
> 				CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
> 				CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
> @@ -407,8 +407,8 @@ class CLI(object):
> 			for family in (row.family for row in families):
> 				smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
> 
> -				self.db.execute("INSERT INTO networks(network, country) \
> -					SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
> +				self.db.execute("INSERT INTO networks(network, country, original_countries) \
> +					SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
> 
> 				# ... determine any other prefixes for this network family, ...
> 				prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
> @@ -421,7 +421,8 @@ class CLI(object):
> 						WITH candidates AS (
> 							SELECT
> 								_rirdata.network,
> -								_rirdata.country
> +								_rirdata.country,
> +								_rirdata.original_countries
> 							FROM
> 								_rirdata
> 							WHERE
> @@ -434,6 +435,7 @@ class CLI(object):
> 								DISTINCT ON (c.network)
> 								c.network,
> 								c.country,
> +								c.original_countries,
> 								masklen(networks.network),
> 								networks.country AS parent_country
> 							FROM
> @@ -447,10 +449,11 @@ class CLI(object):
> 								masklen(networks.network) DESC NULLS LAST
> 						)
> 						INSERT INTO
> -							networks(network, country)
> +							networks(network, country, original_countries)
> 						SELECT
> 							network,
> -							country
> +							country,
> +							original_countries
> 						FROM
> 							filtered
> 						WHERE
> @@ -617,28 +620,36 @@ class CLI(object):
> 				inetnum[key] = [ipaddress.ip_network(val, strict=False)]
> 
> 			elif key == "country":
> -				inetnum[key] = val.upper()
> +				# Catch RIR data objects with more than one country code...
> +				if not key in inetnum.keys():
> +					inetnum[key] = []
> +				else:
> +					if val.upper() in inetnum.get("country"):
> +						# ... but keep this list distinct...
> +						continue
> +
> +				inetnum[key].append(val.upper())

It would generally be a good idea to call .upper() only once.

> 
> 		# Skip empty objects
> 		if not inetnum or not "country" in inetnum:
> 			return
> 
> +		# Prepare skipping objects with unknown country codes...
> +		invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
> +
> 		# Iterate through all networks enumerated from above, check them for plausibility and insert
> 		# them into the database, if _check_parsed_network() succeeded
> 		for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
> 			if self._check_parsed_network(single_network):
> -
> -				# Skip objects with unknown country codes - to avoid log spam for invalid or too small
> -				# networks, this check is - kinda ugly - done at this point
> -				if validcountries and inetnum.get("country") not in validcountries:
> -					log.warning("Skipping network with bogus country '%s': %s" % \
> -						(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
> +				# Skip objects with unknown country codes if they are valid to avoid log spam...
> +				if validcountries and invalidcountries:
> +					log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
> +						(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
> 					break
> 
> -				# Everything is fine here, run INSERT statement...
> -				self.db.execute("INSERT INTO _rirdata(network, country) \
> -					VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
> -					"%s" % single_network, inetnum.get("country"),
> +				self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
> +					VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
> +					"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
> 				)
> 
> 	def _parse_org_block(self, block):
> @@ -729,10 +740,10 @@ class CLI(object):
> 		if not self._check_parsed_network(network):
> 			return
> 
> -		self.db.execute("INSERT INTO networks(network, country) \
> -			VALUES(%s, %s) ON CONFLICT (network) DO \
> +		self.db.execute("INSERT INTO networks(network, country, original_countries) \
> +			VALUES(%s, %s, %s) ON CONFLICT (network) DO \
> 			UPDATE SET country = excluded.country",
> -			"%s" % network, country,
> +			"%s" % network, country, [country],
> 		)
> 
> 	def handle_update_announcements(self, ns):
> -- 
> 2.26.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] location-importer.in: track original countries as well
  2021-05-18 10:38 ` Michael Tremer
@ 2021-05-21  9:23   ` Peter Müller
  2021-05-21  9:24     ` Michael Tremer
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Müller @ 2021-05-21  9:23 UTC (permalink / raw)
  To: location

[-- Attachment #1: Type: text/plain, Size: 7358 bytes --]

Hello Michael,

thanks for your reply.

> Hello,
> 
> Thank you for submitting the patch. It looks okay to me.
> 
> But merging this would break existing databases. Could we not add an ALTER TABLE statement in order to add the original_countries column to the networks table?

To ensure I understood you correctly:

(a) Leave the "CREATE TABLE IF NOT EXISTS networks ..." statement untouched.

(b) Add the "original_countries" to our temporary table.

(c) After having finished parsing, ALTER the networks table to add the additional column,
    and fill in the parsed data.

If so, I will hand in a second version of this patch.

Thanks, and best regards,
Peter Müller

> 
> -Michael
> 
>> On 14 May 2021, at 17:55, Peter Müller <peter.mueller(a)ipfire.org> wrote:
>>
>> This helps us to determine how many network objects have more than one
>> country set, and what their original country code set looked like.
>>
>> Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
>> ---
>> src/python/location-importer.in | 53 ++++++++++++++++++++-------------
>> 1 file changed, 32 insertions(+), 21 deletions(-)
>>
>> diff --git a/src/python/location-importer.in b/src/python/location-importer.in
>> index e5f55af..c7162cf 100644
>> --- a/src/python/location-importer.in
>> +++ b/src/python/location-importer.in
>> @@ -164,7 +164,7 @@ class CLI(object):
>> 				CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
>>
>> 				-- networks
>> -				CREATE TABLE IF NOT EXISTS networks(network inet, country text);
>> +				CREATE TABLE IF NOT EXISTS networks(network inet, country text, original_countries text[]);
>> 				CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
>> 				CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
>> 				CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
>> @@ -377,7 +377,7 @@ class CLI(object):
>> 					ON COMMIT DROP;
>> 				CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
>>
>> -				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
>> +				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[])
>> 					ON COMMIT DROP;
>> 				CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
>> 				CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
>> @@ -407,8 +407,8 @@ class CLI(object):
>> 			for family in (row.family for row in families):
>> 				smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
>>
>> -				self.db.execute("INSERT INTO networks(network, country) \
>> -					SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>> +				self.db.execute("INSERT INTO networks(network, country, original_countries) \
>> +					SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>>
>> 				# ... determine any other prefixes for this network family, ...
>> 				prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
>> @@ -421,7 +421,8 @@ class CLI(object):
>> 						WITH candidates AS (
>> 							SELECT
>> 								_rirdata.network,
>> -								_rirdata.country
>> +								_rirdata.country,
>> +								_rirdata.original_countries
>> 							FROM
>> 								_rirdata
>> 							WHERE
>> @@ -434,6 +435,7 @@ class CLI(object):
>> 								DISTINCT ON (c.network)
>> 								c.network,
>> 								c.country,
>> +								c.original_countries,
>> 								masklen(networks.network),
>> 								networks.country AS parent_country
>> 							FROM
>> @@ -447,10 +449,11 @@ class CLI(object):
>> 								masklen(networks.network) DESC NULLS LAST
>> 						)
>> 						INSERT INTO
>> -							networks(network, country)
>> +							networks(network, country, original_countries)
>> 						SELECT
>> 							network,
>> -							country
>> +							country,
>> +							original_countries
>> 						FROM
>> 							filtered
>> 						WHERE
>> @@ -617,28 +620,36 @@ class CLI(object):
>> 				inetnum[key] = [ipaddress.ip_network(val, strict=False)]
>>
>> 			elif key == "country":
>> -				inetnum[key] = val.upper()
>> +				# Catch RIR data objects with more than one country code...
>> +				if not key in inetnum.keys():
>> +					inetnum[key] = []
>> +				else:
>> +					if val.upper() in inetnum.get("country"):
>> +						# ... but keep this list distinct...
>> +						continue
>> +
>> +				inetnum[key].append(val.upper())
> 
> It would generally be a good idea to call .upper() only once.
> 
>>
>> 		# Skip empty objects
>> 		if not inetnum or not "country" in inetnum:
>> 			return
>>
>> +		# Prepare skipping objects with unknown country codes...
>> +		invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
>> +
>> 		# Iterate through all networks enumerated from above, check them for plausibility and insert
>> 		# them into the database, if _check_parsed_network() succeeded
>> 		for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
>> 			if self._check_parsed_network(single_network):
>> -
>> -				# Skip objects with unknown country codes - to avoid log spam for invalid or too small
>> -				# networks, this check is - kinda ugly - done at this point
>> -				if validcountries and inetnum.get("country") not in validcountries:
>> -					log.warning("Skipping network with bogus country '%s': %s" % \
>> -						(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> +				# Skip objects with unknown country codes if they are valid to avoid log spam...
>> +				if validcountries and invalidcountries:
>> +					log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
>> +						(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> 					break
>>
>> -				# Everything is fine here, run INSERT statement...
>> -				self.db.execute("INSERT INTO _rirdata(network, country) \
>> -					VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> -					"%s" % single_network, inetnum.get("country"),
>> +				self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
>> +					VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> +					"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
>> 				)
>>
>> 	def _parse_org_block(self, block):
>> @@ -729,10 +740,10 @@ class CLI(object):
>> 		if not self._check_parsed_network(network):
>> 			return
>>
>> -		self.db.execute("INSERT INTO networks(network, country) \
>> -			VALUES(%s, %s) ON CONFLICT (network) DO \
>> +		self.db.execute("INSERT INTO networks(network, country, original_countries) \
>> +			VALUES(%s, %s, %s) ON CONFLICT (network) DO \
>> 			UPDATE SET country = excluded.country",
>> -			"%s" % network, country,
>> +			"%s" % network, country, [country],
>> 		)
>>
>> 	def handle_update_announcements(self, ns):
>> -- 
>> 2.26.2
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] location-importer.in: track original countries as well
  2021-05-21  9:23   ` Peter Müller
@ 2021-05-21  9:24     ` Michael Tremer
  0 siblings, 0 replies; 4+ messages in thread
From: Michael Tremer @ 2021-05-21  9:24 UTC (permalink / raw)
  To: location

[-- Attachment #1: Type: text/plain, Size: 7913 bytes --]

Hello,

> On 21 May 2021, at 10:23, Peter Müller <peter.mueller(a)ipfire.org> wrote:
> 
> Hello Michael,
> 
> thanks for your reply.
> 
>> Hello,
>> 
>> Thank you for submitting the patch. It looks okay to me.
>> 
>> But merging this would break existing databases. Could we not add an ALTER TABLE statement in order to add the original_countries column to the networks table?
> 
> To ensure I understood you correctly:
> 
> (a) Leave the "CREATE TABLE IF NOT EXISTS networks ..." statement untouched.

Yes, but immediately after add an ALTER TABLE ADD COLUMN IF NOT EXISTS statement.

> (b) Add the "original_countries" to our temporary table.

Temporary tables do not exist, so we can create them exactly the way we need them.

> (c) After having finished parsing, ALTER the networks table to add the additional column,
>    and fill in the parsed data.

Please do this when the database schema is being initialised.

> If so, I will hand in a second version of this patch.

Thank you.

-Michael

> 
> Thanks, and best regards,
> Peter Müller
> 
>> 
>> -Michael
>> 
>>> On 14 May 2021, at 17:55, Peter Müller <peter.mueller(a)ipfire.org> wrote:
>>> 
>>> This helps us to determine how many network objects have more than one
>>> country set, and what their original country code set looked like.
>>> 
>>> Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
>>> ---
>>> src/python/location-importer.in | 53 ++++++++++++++++++++-------------
>>> 1 file changed, 32 insertions(+), 21 deletions(-)
>>> 
>>> diff --git a/src/python/location-importer.in b/src/python/location-importer.in
>>> index e5f55af..c7162cf 100644
>>> --- a/src/python/location-importer.in
>>> +++ b/src/python/location-importer.in
>>> @@ -164,7 +164,7 @@ class CLI(object):
>>> 				CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
>>> 
>>> 				-- networks
>>> -				CREATE TABLE IF NOT EXISTS networks(network inet, country text);
>>> +				CREATE TABLE IF NOT EXISTS networks(network inet, country text, original_countries text[]);
>>> 				CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
>>> 				CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
>>> 				CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
>>> @@ -377,7 +377,7 @@ class CLI(object):
>>> 					ON COMMIT DROP;
>>> 				CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
>>> 
>>> -				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
>>> +				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[])
>>> 					ON COMMIT DROP;
>>> 				CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
>>> 				CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
>>> @@ -407,8 +407,8 @@ class CLI(object):
>>> 			for family in (row.family for row in families):
>>> 				smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
>>> 
>>> -				self.db.execute("INSERT INTO networks(network, country) \
>>> -					SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>>> +				self.db.execute("INSERT INTO networks(network, country, original_countries) \
>>> +					SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>>> 
>>> 				# ... determine any other prefixes for this network family, ...
>>> 				prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
>>> @@ -421,7 +421,8 @@ class CLI(object):
>>> 						WITH candidates AS (
>>> 							SELECT
>>> 								_rirdata.network,
>>> -								_rirdata.country
>>> +								_rirdata.country,
>>> +								_rirdata.original_countries
>>> 							FROM
>>> 								_rirdata
>>> 							WHERE
>>> @@ -434,6 +435,7 @@ class CLI(object):
>>> 								DISTINCT ON (c.network)
>>> 								c.network,
>>> 								c.country,
>>> +								c.original_countries,
>>> 								masklen(networks.network),
>>> 								networks.country AS parent_country
>>> 							FROM
>>> @@ -447,10 +449,11 @@ class CLI(object):
>>> 								masklen(networks.network) DESC NULLS LAST
>>> 						)
>>> 						INSERT INTO
>>> -							networks(network, country)
>>> +							networks(network, country, original_countries)
>>> 						SELECT
>>> 							network,
>>> -							country
>>> +							country,
>>> +							original_countries
>>> 						FROM
>>> 							filtered
>>> 						WHERE
>>> @@ -617,28 +620,36 @@ class CLI(object):
>>> 				inetnum[key] = [ipaddress.ip_network(val, strict=False)]
>>> 
>>> 			elif key == "country":
>>> -				inetnum[key] = val.upper()
>>> +				# Catch RIR data objects with more than one country code...
>>> +				if not key in inetnum.keys():
>>> +					inetnum[key] = []
>>> +				else:
>>> +					if val.upper() in inetnum.get("country"):
>>> +						# ... but keep this list distinct...
>>> +						continue
>>> +
>>> +				inetnum[key].append(val.upper())
>> 
>> It would generally be a good idea to call .upper() only once.
>> 
>>> 
>>> 		# Skip empty objects
>>> 		if not inetnum or not "country" in inetnum:
>>> 			return
>>> 
>>> +		# Prepare skipping objects with unknown country codes...
>>> +		invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
>>> +
>>> 		# Iterate through all networks enumerated from above, check them for plausibility and insert
>>> 		# them into the database, if _check_parsed_network() succeeded
>>> 		for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
>>> 			if self._check_parsed_network(single_network):
>>> -
>>> -				# Skip objects with unknown country codes - to avoid log spam for invalid or too small
>>> -				# networks, this check is - kinda ugly - done at this point
>>> -				if validcountries and inetnum.get("country") not in validcountries:
>>> -					log.warning("Skipping network with bogus country '%s': %s" % \
>>> -						(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>>> +				# Skip objects with unknown country codes if they are valid to avoid log spam...
>>> +				if validcountries and invalidcountries:
>>> +					log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
>>> +						(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>>> 					break
>>> 
>>> -				# Everything is fine here, run INSERT statement...
>>> -				self.db.execute("INSERT INTO _rirdata(network, country) \
>>> -					VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>>> -					"%s" % single_network, inetnum.get("country"),
>>> +				self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
>>> +					VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>>> +					"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
>>> 				)
>>> 
>>> 	def _parse_org_block(self, block):
>>> @@ -729,10 +740,10 @@ class CLI(object):
>>> 		if not self._check_parsed_network(network):
>>> 			return
>>> 
>>> -		self.db.execute("INSERT INTO networks(network, country) \
>>> -			VALUES(%s, %s) ON CONFLICT (network) DO \
>>> +		self.db.execute("INSERT INTO networks(network, country, original_countries) \
>>> +			VALUES(%s, %s, %s) ON CONFLICT (network) DO \
>>> 			UPDATE SET country = excluded.country",
>>> -			"%s" % network, country,
>>> +			"%s" % network, country, [country],
>>> 		)
>>> 
>>> 	def handle_update_announcements(self, ns):
>>> -- 
>>> 2.26.2
>> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-05-21  9:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-14 16:55 [PATCH] location-importer.in: track original countries as well Peter Müller
2021-05-18 10:38 ` Michael Tremer
2021-05-21  9:23   ` Peter Müller
2021-05-21  9:24     ` Michael Tremer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox