Hello Michael, thanks for your reply. > Hello, > > Thank you for submitting the patch. It looks okay to me. > > But merging this would break existing databases. Could we not add an ALTER TABLE statement in order to add the original_countries column to the networks table? To ensure I understood you correctly: (a) Leave the "CREATE TABLE IF NOT EXISTS networks ..." statement untouched. (b) Add the "original_countries" to our temporary table. (c) After having finished parsing, ALTER the networks table to add the additional column, and fill in the parsed data. If so, I will hand in a second version of this patch. Thanks, and best regards, Peter Müller > > -Michael > >> On 14 May 2021, at 17:55, Peter Müller wrote: >> >> This helps us to determine how many network objects have more than one >> country set, and what their original country code set looked like. >> >> Signed-off-by: Peter Müller >> --- >> src/python/location-importer.in | 53 ++++++++++++++++++++------------- >> 1 file changed, 32 insertions(+), 21 deletions(-) >> >> diff --git a/src/python/location-importer.in b/src/python/location-importer.in >> index e5f55af..c7162cf 100644 >> --- a/src/python/location-importer.in >> +++ b/src/python/location-importer.in >> @@ -164,7 +164,7 @@ class CLI(object): >> CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code); >> >> -- networks >> - CREATE TABLE IF NOT EXISTS networks(network inet, country text); >> + CREATE TABLE IF NOT EXISTS networks(network inet, country text, original_countries text[]); >> CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); >> CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); >> CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); >> @@ -377,7 +377,7 @@ class CLI(object): >> ON COMMIT DROP; >> CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); >> >> - CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL) >> + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[]) >> ON COMMIT DROP; >> CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); >> CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); >> @@ -407,8 +407,8 @@ class CLI(object): >> for family in (row.family for row in families): >> smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family) >> >> - self.db.execute("INSERT INTO networks(network, country) \ >> - SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) >> + self.db.execute("INSERT INTO networks(network, country, original_countries) \ >> + SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) >> >> # ... determine any other prefixes for this network family, ... >> prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ >> @@ -421,7 +421,8 @@ class CLI(object): >> WITH candidates AS ( >> SELECT >> _rirdata.network, >> - _rirdata.country >> + _rirdata.country, >> + _rirdata.original_countries >> FROM >> _rirdata >> WHERE >> @@ -434,6 +435,7 @@ class CLI(object): >> DISTINCT ON (c.network) >> c.network, >> c.country, >> + c.original_countries, >> masklen(networks.network), >> networks.country AS parent_country >> FROM >> @@ -447,10 +449,11 @@ class CLI(object): >> masklen(networks.network) DESC NULLS LAST >> ) >> INSERT INTO >> - networks(network, country) >> + networks(network, country, original_countries) >> SELECT >> network, >> - country >> + country, >> + original_countries >> FROM >> filtered >> WHERE >> @@ -617,28 +620,36 @@ class CLI(object): >> inetnum[key] = [ipaddress.ip_network(val, strict=False)] >> >> elif key == "country": >> - inetnum[key] = val.upper() >> + # Catch RIR data objects with more than one country code... >> + if not key in inetnum.keys(): >> + inetnum[key] = [] >> + else: >> + if val.upper() in inetnum.get("country"): >> + # ... but keep this list distinct... >> + continue >> + >> + inetnum[key].append(val.upper()) > > It would generally be a good idea to call .upper() only once. > >> >> # Skip empty objects >> if not inetnum or not "country" in inetnum: >> return >> >> + # Prepare skipping objects with unknown country codes... >> + invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries] >> + >> # Iterate through all networks enumerated from above, check them for plausibility and insert >> # them into the database, if _check_parsed_network() succeeded >> for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): >> if self._check_parsed_network(single_network): >> - >> - # Skip objects with unknown country codes - to avoid log spam for invalid or too small >> - # networks, this check is - kinda ugly - done at this point >> - if validcountries and inetnum.get("country") not in validcountries: >> - log.warning("Skipping network with bogus country '%s': %s" % \ >> - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) >> + # Skip objects with unknown country codes if they are valid to avoid log spam... >> + if validcountries and invalidcountries: >> + log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \ >> + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) >> break >> >> - # Everything is fine here, run INSERT statement... >> - self.db.execute("INSERT INTO _rirdata(network, country) \ >> - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", >> - "%s" % single_network, inetnum.get("country"), >> + self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \ >> + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", >> + "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), >> ) >> >> def _parse_org_block(self, block): >> @@ -729,10 +740,10 @@ class CLI(object): >> if not self._check_parsed_network(network): >> return >> >> - self.db.execute("INSERT INTO networks(network, country) \ >> - VALUES(%s, %s) ON CONFLICT (network) DO \ >> + self.db.execute("INSERT INTO networks(network, country, original_countries) \ >> + VALUES(%s, %s, %s) ON CONFLICT (network) DO \ >> UPDATE SET country = excluded.country", >> - "%s" % network, country, >> + "%s" % network, country, [country], >> ) >> >> def handle_update_announcements(self, ns): >> -- >> 2.26.2 >