From mboxrd@z Thu Jan 1 00:00:00 1970 From: Peter =?utf-8?q?M=C3=BCller?= To: location@lists.ipfire.org Subject: Re: [PATCH] location-importer.in: skip networks with unknown country codes Date: Tue, 26 Jan 2021 16:34:40 +0100 Message-ID: In-Reply-To: <20201030143510.6514-1-peter.mueller@ipfire.org> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============5631689742480446480==" List-Id: --===============5631689742480446480== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Hello Michael, if I got this right, this patch still waits acceptance/rejection, which is wh= y I just wanted to bring it up again. :-) Thanks, and best regards, Peter M=C3=BCller > There is no sense in parsing and storting networks whose country codes > cannot be found in the ISO-3166-x country code table. This avoids side > effects in applications using the location database, and introduces > another sanity check to compensate bogus RIR data. >=20 > On location02, this affects some networks from APNIC (country code: ZZ) > as well as a bunch of smaller allocations within the RIPE region still > tagged to CS or YU (Yugoslavia). To my surprise, no network tagged as SU > (Soviet Union) was found - while the NIC for .su TLD is still > operational. :-) >=20 > Fixes: #12510 >=20 > Signed-off-by: Peter M=C3=BCller > --- > src/python/location-importer.in | 42 ++++++++++++++++++++++----------- > 1 file changed, 28 insertions(+), 14 deletions(-) >=20 > diff --git a/src/python/location-importer.in b/src/python/location-importer= .in > index 864eab1..89b556a 100644 > --- a/src/python/location-importer.in > +++ b/src/python/location-importer.in > @@ -388,10 +388,17 @@ class CLI(object): > TRUNCATE TABLE networks; > """) > =20 > + # Fetch all valid country codes to check parsed networks aganist... > + rows =3D self.db.query("SELECT * FROM countries ORDER BY country_code") > + validcountries =3D [] > + > + for row in rows: > + validcountries.append(row.country_code) > + > for source in location.importer.WHOIS_SOURCES: > with downloader.request(source, return_blocks=3DTrue) as f: > for block in f: > - self._parse_block(block) > + self._parse_block(block, validcountries) > =20 > # Process all parsed networks from every RIR we happen to have access t= o, > # insert the largest network chunks into the networks table immediately= ... > @@ -467,7 +474,7 @@ class CLI(object): > # Download data > with downloader.request(source) as f: > for line in f: > - self._parse_line(line) > + self._parse_line(line, validcountries) > =20 > def _check_parsed_network(self, network): > """ > @@ -532,7 +539,7 @@ class CLI(object): > # be suitable for libloc consumption... > return True > =20 > - def _parse_block(self, block): > + def _parse_block(self, block, validcountries =3D None): > # Get first line to find out what type of block this is > line =3D block[0] > =20 > @@ -542,7 +549,7 @@ class CLI(object): > =20 > # inetnum > if line.startswith("inet6num:") or line.startswith("inetnum:"): > - return self._parse_inetnum_block(block) > + return self._parse_inetnum_block(block, validcountries) > =20 > # organisation > elif line.startswith("organisation:"): > @@ -573,7 +580,7 @@ class CLI(object): > autnum.get("asn"), autnum.get("org"), > ) > =20 > - def _parse_inetnum_block(self, block): > + def _parse_inetnum_block(self, block, validcountries =3D None): > log.debug("Parsing inetnum block:") > =20 > inetnum =3D {} > @@ -624,17 +631,17 @@ class CLI(object): > if not inetnum or not "country" in inetnum: > return > =20 > - # Skip objects with bogus country code 'ZZ' > - if inetnum.get("country") =3D=3D "ZZ": > - log.warning("Skipping network with bogus country 'ZZ': %s" % \ > - (inetnum.get("inet6num") or inetnum.get("inetnum"))) > - return > - > network =3D ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get(= "inetnum"), strict=3DFalse) > =20 > if not self._check_parsed_network(network): > return > =20 > + # Skip objects with unknown country codes > + if validcountries and inetnum.get("country") not in validcountries: > + log.warning("Skipping network with bogus country '%s': %s" % \ > + (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetn= um"))) > + return > + > self.db.execute("INSERT INTO _rirdata(network, country) \ > VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country =3D excluded= .country", > "%s" % network, inetnum.get("country"), > @@ -659,7 +666,7 @@ class CLI(object): > org.get("organisation"), org.get("org-name"), > ) > =20 > - def _parse_line(self, line): > + def _parse_line(self, line, validcountries =3D None): > # Skip version line > if line.startswith("2"): > return > @@ -674,8 +681,15 @@ class CLI(object): > log.warning("Could not parse line: %s" % line) > return > =20 > - # Skip any lines that are for stats only > - if country_code =3D=3D "*": > + # Skip any lines that are for stats only or do not have a country > + # code at all (avoids log spam below) > + if not country_code or country_code =3D=3D '*': > + return > + > + # Skip objects with unknown country codes > + if validcountries and country_code not in validcountries: > + log.warning("Skipping line with bogus country '%s': %s" % \ > + (country_code, line)) > return > =20 > if type in ("ipv6", "ipv4"): >=20 --===============5631689742480446480==--