public inbox for location@lists.ipfire.org
 help / color / mirror / Atom feed
* Re: [PATCH v2] location-importer.in: track original countries as well
@ 2021-05-27 10:53 Michael Tremer
  2021-05-30  8:50 ` [PATCH v3] " Peter Müller
  0 siblings, 1 reply; 3+ messages in thread
From: Michael Tremer @ 2021-05-27 10:53 UTC (permalink / raw)
  To: location

[-- Attachment #1: Type: text/plain, Size: 6908 bytes --]

Yes, please.

It currently doesn’t merge and there are plenty of merge issues.

-Michael

> On 26 May 2021, at 19:11, Peter Müller <peter.mueller(a)ipfire.org> wrote:
> 
> Hello Michael,
> 
> this one should be ready for merging as well.
> 
> Or do you want me to rebase it against current master branch?
> 
> Thanks, and best regards,
> Peter Müller
> 
> 
>> This helps us to determine how many network objects have more than one
>> country set, and what their original country code set looked like.
>> 
>> The second version of this patch uses ALTER TABLE to add the column for
>> original countries, preventing existing SQL setups from breaking.
>> 
>> Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
>> ---
>> src/python/location-importer.in | 52 ++++++++++++++++++++-------------
>> 1 file changed, 32 insertions(+), 20 deletions(-)
>> 
>> diff --git a/src/python/location-importer.in b/src/python/location-importer.in
>> index e5f55af..c3f908a 100644
>> --- a/src/python/location-importer.in
>> +++ b/src/python/location-importer.in
>> @@ -165,6 +165,7 @@ class CLI(object):
>> 
>> 				-- networks
>> 				CREATE TABLE IF NOT EXISTS networks(network inet, country text);
>> +				ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
>> 				CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
>> 				CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
>> 				CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
>> @@ -377,7 +378,7 @@ class CLI(object):
>> 					ON COMMIT DROP;
>> 				CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
>> 
>> -				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
>> +				CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL)
>> 					ON COMMIT DROP;
>> 				CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
>> 				CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
>> @@ -407,8 +408,8 @@ class CLI(object):
>> 			for family in (row.family for row in families):
>> 				smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
>> 
>> -				self.db.execute("INSERT INTO networks(network, country) \
>> -					SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>> +				self.db.execute("INSERT INTO networks(network, country, original_countries) \
>> +					SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>> 
>> 				# ... determine any other prefixes for this network family, ...
>> 				prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
>> @@ -421,7 +422,8 @@ class CLI(object):
>> 						WITH candidates AS (
>> 							SELECT
>> 								_rirdata.network,
>> -								_rirdata.country
>> +								_rirdata.country,
>> +								_rirdata.original_countries
>> 							FROM
>> 								_rirdata
>> 							WHERE
>> @@ -434,6 +436,7 @@ class CLI(object):
>> 								DISTINCT ON (c.network)
>> 								c.network,
>> 								c.country,
>> +								c.original_countries,
>> 								masklen(networks.network),
>> 								networks.country AS parent_country
>> 							FROM
>> @@ -447,10 +450,11 @@ class CLI(object):
>> 								masklen(networks.network) DESC NULLS LAST
>> 						)
>> 						INSERT INTO
>> -							networks(network, country)
>> +							networks(network, country, original_countries)
>> 						SELECT
>> 							network,
>> -							country
>> +							country,
>> +							original_countries
>> 						FROM
>> 							filtered
>> 						WHERE
>> @@ -617,28 +621,36 @@ class CLI(object):
>> 				inetnum[key] = [ipaddress.ip_network(val, strict=False)]
>> 
>> 			elif key == "country":
>> -				inetnum[key] = val.upper()
>> +				# Catch RIR data objects with more than one country code...
>> +				if not key in inetnum.keys():
>> +					inetnum[key] = []
>> +				else:
>> +					if val.upper() in inetnum.get("country"):
>> +						# ... but keep this list distinct...
>> +						continue
>> +
>> +				inetnum[key].append(val.upper())
>> 
>> 		# Skip empty objects
>> 		if not inetnum or not "country" in inetnum:
>> 			return
>> 
>> +		# Prepare skipping objects with unknown country codes...
>> +		invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
>> +
>> 		# Iterate through all networks enumerated from above, check them for plausibility and insert
>> 		# them into the database, if _check_parsed_network() succeeded
>> 		for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
>> 			if self._check_parsed_network(single_network):
>> -
>> -				# Skip objects with unknown country codes - to avoid log spam for invalid or too small
>> -				# networks, this check is - kinda ugly - done at this point
>> -				if validcountries and inetnum.get("country") not in validcountries:
>> -					log.warning("Skipping network with bogus country '%s': %s" % \
>> -						(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> +				# Skip objects with unknown country codes if they are valid to avoid log spam...
>> +				if validcountries and invalidcountries:
>> +					log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
>> +						(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> 					break
>> 
>> -				# Everything is fine here, run INSERT statement...
>> -				self.db.execute("INSERT INTO _rirdata(network, country) \
>> -					VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> -					"%s" % single_network, inetnum.get("country"),
>> +				self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
>> +					VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> +					"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
>> 				)
>> 
>> 	def _parse_org_block(self, block):
>> @@ -729,10 +741,10 @@ class CLI(object):
>> 		if not self._check_parsed_network(network):
>> 			return
>> 
>> -		self.db.execute("INSERT INTO networks(network, country) \
>> -			VALUES(%s, %s) ON CONFLICT (network) DO \
>> +		self.db.execute("INSERT INTO networks(network, country, original_countries) \
>> +			VALUES(%s, %s, %s) ON CONFLICT (network) DO \
>> 			UPDATE SET country = excluded.country",
>> -			"%s" % network, country,
>> +			"%s" % network, country, [country],
>> 		)
>> 
>> 	def handle_update_announcements(self, ns):
>> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-02 21:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <F1C05B70-B37A-4A17-9BDF-A165643CC07A@ipfire.org>
2021-06-02 21:00 ` [PATCH] location-importer.in: track original countries more pythonic Peter Müller
2021-06-02 21:01 ` [PATCH v3] location-importer.in: track original countries as well Peter Müller
2021-05-27 10:53 [PATCH v2] " Michael Tremer
2021-05-30  8:50 ` [PATCH v3] " Peter Müller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox