From mboxrd@z Thu Jan 1 00:00:00 1970 From: Peter =?utf-8?q?M=C3=BCller?= To: location@lists.ipfire.org Subject: Re: [PATCH v3] location-importer.in: track original countries as well Date: Wed, 02 Jun 2021 23:01:32 +0200 Message-ID: <305db54f-751f-8ba5-b15a-0f747c93d1a1@ipfire.org> In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============2655281421140064649==" List-Id: --===============2655281421140064649== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Hello Michael, thanks for your reply and the annotations. Patch #4390 (https://patchwork.ipfire.org/patch/4390/) should fix them. :-) Thanks, and best regards, Peter M=C3=BCller > Hello, >=20 >> On 30 May 2021, at 09:50, Peter M=C3=BCller w= rote: >> >> This helps us to determine how many network objects have more than one >> country set, and what their original country code set looked like. >> >> The third version of this patch uses ALTER TABLE to add the column for >> original countries, preventing existing SQL setups from breaking, and is >> correctly based against the current "master" branch. >> >> Signed-off-by: Peter M=C3=BCller >> --- >> src/python/location-importer.in | 47 +++++++++++++++++++++------------ >> 1 file changed, 30 insertions(+), 17 deletions(-) >> >> diff --git a/src/python/location-importer.in b/src/python/location-importe= r.in >> index f796652..a3c16bc 100644 >> --- a/src/python/location-importer.in >> +++ b/src/python/location-importer.in >> @@ -166,6 +166,7 @@ class CLI(object): >> >> -- networks >> CREATE TABLE IF NOT EXISTS networks(network inet, country text); >> + ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text= []; >> ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL; >> CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network= ); >> CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(fam= ily(network)); >> @@ -379,7 +380,7 @@ class CLI(object): >> ON COMMIT DROP; >> CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); >> >> - CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text N= OT NULL, source text NOT NULL) >> + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text N= OT NULL, original_countries text[] NOT NULL, source text NOT NULL) >> ON COMMIT DROP; >> CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), = masklen(network)); >> CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); >> @@ -410,8 +411,8 @@ class CLI(object): >> for family in (row.family for row in families): >> smallest =3D self.db.get("SELECT MIN(masklen(network)) AS prefix FROM = _rirdata WHERE family(network) =3D %s", family) >> >> - self.db.execute("INSERT INTO networks(network, country, source) \ >> - SELECT network, country, source FROM _rirdata WHERE masklen(network)= =3D %s AND family(network) =3D %s", smallest.prefix, family) >> + self.db.execute("INSERT INTO networks(network, country, original_coun= tries, source) \ >> + SELECT network, country, original_countries, source FROM _rirdata WH= ERE masklen(network) =3D %s AND family(network) =3D %s", smallest.prefix, fam= ily) >> >> # ... determine any other prefixes for this network family, ... >> prefixes =3D self.db.query("SELECT DISTINCT masklen(network) AS prefix= FROM _rirdata \ >> @@ -425,6 +426,7 @@ class CLI(object): >> SELECT >> _rirdata.network, >> _rirdata.country, >> + _rirdata.original_countries, >> _rirdata.source >> FROM >> _rirdata >> @@ -438,6 +440,7 @@ class CLI(object): >> DISTINCT ON (c.network) >> c.network, >> c.country, >> + c.original_countries, >> c.source, >> masklen(networks.network), >> networks.country AS parent_country >> @@ -452,10 +455,11 @@ class CLI(object): >> masklen(networks.network) DESC NULLS LAST >> ) >> INSERT INTO >> - networks(network, country, source) >> + networks(network, country, original_countries, source) >> SELECT >> network, >> country, >> + original_countries, >> source >> FROM >> filtered >> @@ -624,28 +628,37 @@ class CLI(object): >> inetnum[key] =3D [ipaddress.ip_network(val, strict=3DFalse)] >> >> elif key =3D=3D "country": >> - inetnum[key] =3D val.upper() >> + # Catch RIR data objects with more than one country code... >> + if not key in inetnum.keys(): >=20 > It would be more Pythonic to write =E2=80=9Cif not key in inetnum:=E2=80=9D. >=20 > I believe your implementation creates a list which has to be searched where= as mine performs a dictionary lookup which should be much faster. It wouldn= =E2=80=99t matter much when you only have a hand full of elements to check, b= ut since we perform this code tens of thousands of times, every little bit co= unts. >=20 >> + inetnum[key] =3D [] >> + else: >> + if val.upper() in inetnum.get("country"): >> + # ... but keep this list distinct... >> + continue >> + >> + inetnum[key].append(val.upper()) >=20 > You call .upper() a second time when you could just store the value and use= that instead. >=20 >> # Skip empty objects >> if not inetnum or not "country" in inetnum: >> return >> >> + # Prepare skipping objects with unknown country codes... >> + invalidcountries =3D [singlecountry for singlecountry in inetnum.get("c= ountry") if singlecountry not in validcountries] >> + >> # Iterate through all networks enumerated from above, check them for pla= usibility and insert >> # them into the database, if _check_parsed_network() succeeded >> for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): >> if self._check_parsed_network(single_network): >> >> - # Skip objects with unknown country codes - to avoid log spam for inv= alid or too small >> - # networks, this check is - kinda ugly - done at this point >> - if validcountries and inetnum.get("country") not in validcountries: >> - log.warning("Skipping network with bogus country '%s': %s" % \ >> - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("in= etnum"))) >> - break >> + # Skip objects with unknown country codes if they are valid to avoid = log spam... >> + if validcountries and invalidcountries: >> + log.warning("Skipping network with bogus countr(y|ies) %s (original = countries: %s): %s" % \ >> + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") = or inetnum.get("inetnum"))) >> >> # Everything is fine here, run INSERT statement... >> - self.db.execute("INSERT INTO _rirdata(network, country, source) \ >> - VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country =3D e= xcluded.country", >> - "%s" % single_network, inetnum.get("country"), source_key, >> + self.db.execute("INSERT INTO _rirdata(network, country, original_coun= tries, source) \ >> + VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = =3D excluded.country", >> + "%s" % single_network, inetnum.get("country")[0], inetnum.get("count= ry"), source_key, >> ) >> >> def _parse_org_block(self, block, source_key): >> @@ -736,10 +749,10 @@ class CLI(object): >> if not self._check_parsed_network(network): >> return >> >> - self.db.execute("INSERT INTO networks(network, country, source) \ >> - VALUES(%s, %s, %s) ON CONFLICT (network) DO \ >> + self.db.execute("INSERT INTO networks(network, country, original_countr= ies, source) \ >> + VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ >> UPDATE SET country =3D excluded.country", >> - "%s" % network, country, source_key, >> + "%s" % network, country, [country], source_key, >> ) >> >> def handle_update_announcements(self, ns): >> --=20 >> 2.26.2 >> >=20 > Would you submit a second patch that fixes those minor things and I will me= rge both? >=20 > -Michael >=20 --===============2655281421140064649==--