From mboxrd@z Thu Jan 1 00:00:00 1970 From: Michael Tremer To: location@lists.ipfire.org Subject: Re: [PATCH] location-importer.in: track original countries as well Date: Tue, 18 May 2021 11:38:24 +0100 Message-ID: In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============6617513828337044688==" List-Id: --===============6617513828337044688== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Hello, Thank you for submitting the patch. It looks okay to me. But merging this would break existing databases. Could we not add an ALTER TA= BLE statement in order to add the original_countries column to the networks t= able? -Michael > On 14 May 2021, at 17:55, Peter M=C3=BCller wr= ote: >=20 > This helps us to determine how many network objects have more than one > country set, and what their original country code set looked like. >=20 > Signed-off-by: Peter M=C3=BCller > --- > src/python/location-importer.in | 53 ++++++++++++++++++++------------- > 1 file changed, 32 insertions(+), 21 deletions(-) >=20 > diff --git a/src/python/location-importer.in b/src/python/location-importer= .in > index e5f55af..c7162cf 100644 > --- a/src/python/location-importer.in > +++ b/src/python/location-importer.in > @@ -164,7 +164,7 @@ class CLI(object): > CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(c= ountry_code); >=20 > -- networks > - CREATE TABLE IF NOT EXISTS networks(network inet, country text); > + CREATE TABLE IF NOT EXISTS networks(network inet, country text, origin= al_countries text[]); > CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); > CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(fami= ly(network)); > CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(netwo= rk inet_ops); > @@ -377,7 +377,7 @@ class CLI(object): > ON COMMIT DROP; > CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); >=20 > - CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NO= T NULL) > + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NO= T NULL, original_countries text[]) > ON COMMIT DROP; > CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), m= asklen(network)); > CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); > @@ -407,8 +407,8 @@ class CLI(object): > for family in (row.family for row in families): > smallest =3D self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _= rirdata WHERE family(network) =3D %s", family) >=20 > - self.db.execute("INSERT INTO networks(network, country) \ > - SELECT network, country FROM _rirdata WHERE masklen(network) =3D %s A= ND family(network) =3D %s", smallest.prefix, family) > + self.db.execute("INSERT INTO networks(network, country, original_count= ries) \ > + SELECT network, country, original_countries FROM _rirdata WHERE maskl= en(network) =3D %s AND family(network) =3D %s", smallest.prefix, family) >=20 > # ... determine any other prefixes for this network family, ... > prefixes =3D self.db.query("SELECT DISTINCT masklen(network) AS prefix = FROM _rirdata \ > @@ -421,7 +421,8 @@ class CLI(object): > WITH candidates AS ( > SELECT > _rirdata.network, > - _rirdata.country > + _rirdata.country, > + _rirdata.original_countries > FROM > _rirdata > WHERE > @@ -434,6 +435,7 @@ class CLI(object): > DISTINCT ON (c.network) > c.network, > c.country, > + c.original_countries, > masklen(networks.network), > networks.country AS parent_country > FROM > @@ -447,10 +449,11 @@ class CLI(object): > masklen(networks.network) DESC NULLS LAST > ) > INSERT INTO > - networks(network, country) > + networks(network, country, original_countries) > SELECT > network, > - country > + country, > + original_countries > FROM > filtered > WHERE > @@ -617,28 +620,36 @@ class CLI(object): > inetnum[key] =3D [ipaddress.ip_network(val, strict=3DFalse)] >=20 > elif key =3D=3D "country": > - inetnum[key] =3D val.upper() > + # Catch RIR data objects with more than one country code... > + if not key in inetnum.keys(): > + inetnum[key] =3D [] > + else: > + if val.upper() in inetnum.get("country"): > + # ... but keep this list distinct... > + continue > + > + inetnum[key].append(val.upper()) It would generally be a good idea to call .upper() only once. >=20 > # Skip empty objects > if not inetnum or not "country" in inetnum: > return >=20 > + # Prepare skipping objects with unknown country codes... > + invalidcountries =3D [singlecountry for singlecountry in inetnum.get("co= untry") if singlecountry not in validcountries] > + > # Iterate through all networks enumerated from above, check them for plau= sibility and insert > # them into the database, if _check_parsed_network() succeeded > for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): > if self._check_parsed_network(single_network): > - > - # Skip objects with unknown country codes - to avoid log spam for inva= lid or too small > - # networks, this check is - kinda ugly - done at this point > - if validcountries and inetnum.get("country") not in validcountries: > - log.warning("Skipping network with bogus country '%s': %s" % \ > - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("ine= tnum"))) > + # Skip objects with unknown country codes if they are valid to avoid l= og spam... > + if validcountries and invalidcountries: > + log.warning("Skipping network with bogus countr(y|ies) %s (original c= ountries: %s): %s" % \ > + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") o= r inetnum.get("inetnum"))) > break >=20 > - # Everything is fine here, run INSERT statement... > - self.db.execute("INSERT INTO _rirdata(network, country) \ > - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country =3D exclud= ed.country", > - "%s" % single_network, inetnum.get("country"), > + self.db.execute("INSERT INTO _rirdata(network, country, original_count= ries) \ > + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country =3D ex= cluded.country", > + "%s" % single_network, inetnum.get("country")[0], inetnum.get("countr= y"), > ) >=20 > def _parse_org_block(self, block): > @@ -729,10 +740,10 @@ class CLI(object): > if not self._check_parsed_network(network): > return >=20 > - self.db.execute("INSERT INTO networks(network, country) \ > - VALUES(%s, %s) ON CONFLICT (network) DO \ > + self.db.execute("INSERT INTO networks(network, country, original_countri= es) \ > + VALUES(%s, %s, %s) ON CONFLICT (network) DO \ > UPDATE SET country =3D excluded.country", > - "%s" % network, country, > + "%s" % network, country, [country], > ) >=20 > def handle_update_announcements(self, ns): > --=20 > 2.26.2 --===============6617513828337044688==--