From mboxrd@z Thu Jan 1 00:00:00 1970 From: Peter =?utf-8?q?M=C3=BCller?= To: location@lists.ipfire.org Subject: Re: [PATCH] location-importer.in: skip networks with unknown country codes Date: Thu, 04 Feb 2021 18:32:45 +0100 Message-ID: In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============6563443753753487502==" List-Id: --===============6563443753753487502== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable *cough* :-) > Hello Michael, >=20 > if I got this right, this patch still waits acceptance/rejection, which is = why I just > wanted to bring it up again. :-) >=20 > Thanks, and best regards, > Peter M=C3=BCller >=20 >> There is no sense in parsing and storting networks whose country codes >> cannot be found in the ISO-3166-x country code table. This avoids side >> effects in applications using the location database, and introduces >> another sanity check to compensate bogus RIR data. >> >> On location02, this affects some networks from APNIC (country code: ZZ) >> as well as a bunch of smaller allocations within the RIPE region still >> tagged to CS or YU (Yugoslavia). To my surprise, no network tagged as SU >> (Soviet Union) was found - while the NIC for .su TLD is still >> operational. :-) >> >> Fixes: #12510 >> >> Signed-off-by: Peter M=C3=BCller >> --- >> src/python/location-importer.in | 42 ++++++++++++++++++++++----------- >> 1 file changed, 28 insertions(+), 14 deletions(-) >> >> diff --git a/src/python/location-importer.in b/src/python/location-importe= r.in >> index 864eab1..89b556a 100644 >> --- a/src/python/location-importer.in >> +++ b/src/python/location-importer.in >> @@ -388,10 +388,17 @@ class CLI(object): >> TRUNCATE TABLE networks; >> """) >> =20 >> + # Fetch all valid country codes to check parsed networks aganist... >> + rows =3D self.db.query("SELECT * FROM countries ORDER BY country_code") >> + validcountries =3D [] >> + >> + for row in rows: >> + validcountries.append(row.country_code) >> + >> for source in location.importer.WHOIS_SOURCES: >> with downloader.request(source, return_blocks=3DTrue) as f: >> for block in f: >> - self._parse_block(block) >> + self._parse_block(block, validcountries) >> =20 >> # Process all parsed networks from every RIR we happen to have access = to, >> # insert the largest network chunks into the networks table immediatel= y... >> @@ -467,7 +474,7 @@ class CLI(object): >> # Download data >> with downloader.request(source) as f: >> for line in f: >> - self._parse_line(line) >> + self._parse_line(line, validcountries) >> =20 >> def _check_parsed_network(self, network): >> """ >> @@ -532,7 +539,7 @@ class CLI(object): >> # be suitable for libloc consumption... >> return True >> =20 >> - def _parse_block(self, block): >> + def _parse_block(self, block, validcountries =3D None): >> # Get first line to find out what type of block this is >> line =3D block[0] >> =20 >> @@ -542,7 +549,7 @@ class CLI(object): >> =20 >> # inetnum >> if line.startswith("inet6num:") or line.startswith("inetnum:"): >> - return self._parse_inetnum_block(block) >> + return self._parse_inetnum_block(block, validcountries) >> =20 >> # organisation >> elif line.startswith("organisation:"): >> @@ -573,7 +580,7 @@ class CLI(object): >> autnum.get("asn"), autnum.get("org"), >> ) >> =20 >> - def _parse_inetnum_block(self, block): >> + def _parse_inetnum_block(self, block, validcountries =3D None): >> log.debug("Parsing inetnum block:") >> =20 >> inetnum =3D {} >> @@ -624,17 +631,17 @@ class CLI(object): >> if not inetnum or not "country" in inetnum: >> return >> =20 >> - # Skip objects with bogus country code 'ZZ' >> - if inetnum.get("country") =3D=3D "ZZ": >> - log.warning("Skipping network with bogus country 'ZZ': %s" % \ >> - (inetnum.get("inet6num") or inetnum.get("inetnum"))) >> - return >> - >> network =3D ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get= ("inetnum"), strict=3DFalse) >> =20 >> if not self._check_parsed_network(network): >> return >> =20 >> + # Skip objects with unknown country codes >> + if validcountries and inetnum.get("country") not in validcountries: >> + log.warning("Skipping network with bogus country '%s': %s" % \ >> + (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inet= num"))) >> + return >> + >> self.db.execute("INSERT INTO _rirdata(network, country) \ >> VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country =3D exclude= d.country", >> "%s" % network, inetnum.get("country"), >> @@ -659,7 +666,7 @@ class CLI(object): >> org.get("organisation"), org.get("org-name"), >> ) >> =20 >> - def _parse_line(self, line): >> + def _parse_line(self, line, validcountries =3D None): >> # Skip version line >> if line.startswith("2"): >> return >> @@ -674,8 +681,15 @@ class CLI(object): >> log.warning("Could not parse line: %s" % line) >> return >> =20 >> - # Skip any lines that are for stats only >> - if country_code =3D=3D "*": >> + # Skip any lines that are for stats only or do not have a country >> + # code at all (avoids log spam below) >> + if not country_code or country_code =3D=3D '*': >> + return >> + >> + # Skip objects with unknown country codes >> + if validcountries and country_code not in validcountries: >> + log.warning("Skipping line with bogus country '%s': %s" % \ >> + (country_code, line)) >> return >> =20 >> if type in ("ipv6", "ipv4"): >> --===============6563443753753487502==--