From mboxrd@z Thu Jan 1 00:00:00 1970 From: Michael Tremer To: location@lists.ipfire.org Subject: Re: [PATCH] location-importer.in: track original countries as well Date: Fri, 21 May 2021 10:24:49 +0100 Message-ID: In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============7409576504075391096==" List-Id: --===============7409576504075391096== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Hello, > On 21 May 2021, at 10:23, Peter M=C3=BCller wr= ote: >=20 > Hello Michael, >=20 > thanks for your reply. >=20 >> Hello, >>=20 >> Thank you for submitting the patch. It looks okay to me. >>=20 >> But merging this would break existing databases. Could we not add an ALTER= TABLE statement in order to add the original_countries column to the network= s table? >=20 > To ensure I understood you correctly: >=20 > (a) Leave the "CREATE TABLE IF NOT EXISTS networks ..." statement untouched. Yes, but immediately after add an ALTER TABLE ADD COLUMN IF NOT EXISTS statem= ent. > (b) Add the "original_countries" to our temporary table. Temporary tables do not exist, so we can create them exactly the way we need = them. > (c) After having finished parsing, ALTER the networks table to add the addi= tional column, > and fill in the parsed data. Please do this when the database schema is being initialised. > If so, I will hand in a second version of this patch. Thank you. -Michael >=20 > Thanks, and best regards, > Peter M=C3=BCller >=20 >>=20 >> -Michael >>=20 >>> On 14 May 2021, at 17:55, Peter M=C3=BCller = wrote: >>>=20 >>> This helps us to determine how many network objects have more than one >>> country set, and what their original country code set looked like. >>>=20 >>> Signed-off-by: Peter M=C3=BCller >>> --- >>> src/python/location-importer.in | 53 ++++++++++++++++++++------------- >>> 1 file changed, 32 insertions(+), 21 deletions(-) >>>=20 >>> diff --git a/src/python/location-importer.in b/src/python/location-import= er.in >>> index e5f55af..c7162cf 100644 >>> --- a/src/python/location-importer.in >>> +++ b/src/python/location-importer.in >>> @@ -164,7 +164,7 @@ class CLI(object): >>> CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries= (country_code); >>>=20 >>> -- networks >>> - CREATE TABLE IF NOT EXISTS networks(network inet, country text); >>> + CREATE TABLE IF NOT EXISTS networks(network inet, country text, orig= inal_countries text[]); >>> CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(networ= k); >>> CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(fa= mily(network)); >>> CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(net= work inet_ops); >>> @@ -377,7 +377,7 @@ class CLI(object): >>> ON COMMIT DROP; >>> CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); >>>=20 >>> - CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text = NOT NULL) >>> + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text = NOT NULL, original_countries text[]) >>> ON COMMIT DROP; >>> CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network),= masklen(network)); >>> CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); >>> @@ -407,8 +407,8 @@ class CLI(object): >>> for family in (row.family for row in families): >>> smallest =3D self.db.get("SELECT MIN(masklen(network)) AS prefix FROM= _rirdata WHERE family(network) =3D %s", family) >>>=20 >>> - self.db.execute("INSERT INTO networks(network, country) \ >>> - SELECT network, country FROM _rirdata WHERE masklen(network) =3D %s= AND family(network) =3D %s", smallest.prefix, family) >>> + self.db.execute("INSERT INTO networks(network, country, original_cou= ntries) \ >>> + SELECT network, country, original_countries FROM _rirdata WHERE mas= klen(network) =3D %s AND family(network) =3D %s", smallest.prefix, family) >>>=20 >>> # ... determine any other prefixes for this network family, ... >>> prefixes =3D self.db.query("SELECT DISTINCT masklen(network) AS prefi= x FROM _rirdata \ >>> @@ -421,7 +421,8 @@ class CLI(object): >>> WITH candidates AS ( >>> SELECT >>> _rirdata.network, >>> - _rirdata.country >>> + _rirdata.country, >>> + _rirdata.original_countries >>> FROM >>> _rirdata >>> WHERE >>> @@ -434,6 +435,7 @@ class CLI(object): >>> DISTINCT ON (c.network) >>> c.network, >>> c.country, >>> + c.original_countries, >>> masklen(networks.network), >>> networks.country AS parent_country >>> FROM >>> @@ -447,10 +449,11 @@ class CLI(object): >>> masklen(networks.network) DESC NULLS LAST >>> ) >>> INSERT INTO >>> - networks(network, country) >>> + networks(network, country, original_countries) >>> SELECT >>> network, >>> - country >>> + country, >>> + original_countries >>> FROM >>> filtered >>> WHERE >>> @@ -617,28 +620,36 @@ class CLI(object): >>> inetnum[key] =3D [ipaddress.ip_network(val, strict=3DFalse)] >>>=20 >>> elif key =3D=3D "country": >>> - inetnum[key] =3D val.upper() >>> + # Catch RIR data objects with more than one country code... >>> + if not key in inetnum.keys(): >>> + inetnum[key] =3D [] >>> + else: >>> + if val.upper() in inetnum.get("country"): >>> + # ... but keep this list distinct... >>> + continue >>> + >>> + inetnum[key].append(val.upper()) >>=20 >> It would generally be a good idea to call .upper() only once. >>=20 >>>=20 >>> # Skip empty objects >>> if not inetnum or not "country" in inetnum: >>> return >>>=20 >>> + # Prepare skipping objects with unknown country codes... >>> + invalidcountries =3D [singlecountry for singlecountry in inetnum.get("= country") if singlecountry not in validcountries] >>> + >>> # Iterate through all networks enumerated from above, check them for pl= ausibility and insert >>> # them into the database, if _check_parsed_network() succeeded >>> for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): >>> if self._check_parsed_network(single_network): >>> - >>> - # Skip objects with unknown country codes - to avoid log spam for in= valid or too small >>> - # networks, this check is - kinda ugly - done at this point >>> - if validcountries and inetnum.get("country") not in validcountries: >>> - log.warning("Skipping network with bogus country '%s': %s" % \ >>> - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("i= netnum"))) >>> + # Skip objects with unknown country codes if they are valid to avoid= log spam... >>> + if validcountries and invalidcountries: >>> + log.warning("Skipping network with bogus countr(y|ies) %s (original= countries: %s): %s" % \ >>> + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num")= or inetnum.get("inetnum"))) >>> break >>>=20 >>> - # Everything is fine here, run INSERT statement... >>> - self.db.execute("INSERT INTO _rirdata(network, country) \ >>> - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country =3D excl= uded.country", >>> - "%s" % single_network, inetnum.get("country"), >>> + self.db.execute("INSERT INTO _rirdata(network, country, original_cou= ntries) \ >>> + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country =3D = excluded.country", >>> + "%s" % single_network, inetnum.get("country")[0], inetnum.get("coun= try"), >>> ) >>>=20 >>> def _parse_org_block(self, block): >>> @@ -729,10 +740,10 @@ class CLI(object): >>> if not self._check_parsed_network(network): >>> return >>>=20 >>> - self.db.execute("INSERT INTO networks(network, country) \ >>> - VALUES(%s, %s) ON CONFLICT (network) DO \ >>> + self.db.execute("INSERT INTO networks(network, country, original_count= ries) \ >>> + VALUES(%s, %s, %s) ON CONFLICT (network) DO \ >>> UPDATE SET country =3D excluded.country", >>> - "%s" % network, country, >>> + "%s" % network, country, [country], >>> ) >>>=20 >>> def handle_update_announcements(self, ns): >>> --=20 >>> 2.26.2 >>=20 --===============7409576504075391096==--