*cough* :-) > Hello Michael, > > if I got this right, this patch still waits acceptance/rejection, which is why I just > wanted to bring it up again. :-) > > Thanks, and best regards, > Peter Müller > >> There is no sense in parsing and storting networks whose country codes >> cannot be found in the ISO-3166-x country code table. This avoids side >> effects in applications using the location database, and introduces >> another sanity check to compensate bogus RIR data. >> >> On location02, this affects some networks from APNIC (country code: ZZ) >> as well as a bunch of smaller allocations within the RIPE region still >> tagged to CS or YU (Yugoslavia). To my surprise, no network tagged as SU >> (Soviet Union) was found - while the NIC for .su TLD is still >> operational. :-) >> >> Fixes: #12510 >> >> Signed-off-by: Peter Müller >> --- >> src/python/location-importer.in | 42 ++++++++++++++++++++++----------- >> 1 file changed, 28 insertions(+), 14 deletions(-) >> >> diff --git a/src/python/location-importer.in b/src/python/location-importer.in >> index 864eab1..89b556a 100644 >> --- a/src/python/location-importer.in >> +++ b/src/python/location-importer.in >> @@ -388,10 +388,17 @@ class CLI(object): >> TRUNCATE TABLE networks; >> """) >> >> + # Fetch all valid country codes to check parsed networks aganist... >> + rows = self.db.query("SELECT * FROM countries ORDER BY country_code") >> + validcountries = [] >> + >> + for row in rows: >> + validcountries.append(row.country_code) >> + >> for source in location.importer.WHOIS_SOURCES: >> with downloader.request(source, return_blocks=True) as f: >> for block in f: >> - self._parse_block(block) >> + self._parse_block(block, validcountries) >> >> # Process all parsed networks from every RIR we happen to have access to, >> # insert the largest network chunks into the networks table immediately... >> @@ -467,7 +474,7 @@ class CLI(object): >> # Download data >> with downloader.request(source) as f: >> for line in f: >> - self._parse_line(line) >> + self._parse_line(line, validcountries) >> >> def _check_parsed_network(self, network): >> """ >> @@ -532,7 +539,7 @@ class CLI(object): >> # be suitable for libloc consumption... >> return True >> >> - def _parse_block(self, block): >> + def _parse_block(self, block, validcountries = None): >> # Get first line to find out what type of block this is >> line = block[0] >> >> @@ -542,7 +549,7 @@ class CLI(object): >> >> # inetnum >> if line.startswith("inet6num:") or line.startswith("inetnum:"): >> - return self._parse_inetnum_block(block) >> + return self._parse_inetnum_block(block, validcountries) >> >> # organisation >> elif line.startswith("organisation:"): >> @@ -573,7 +580,7 @@ class CLI(object): >> autnum.get("asn"), autnum.get("org"), >> ) >> >> - def _parse_inetnum_block(self, block): >> + def _parse_inetnum_block(self, block, validcountries = None): >> log.debug("Parsing inetnum block:") >> >> inetnum = {} >> @@ -624,17 +631,17 @@ class CLI(object): >> if not inetnum or not "country" in inetnum: >> return >> >> - # Skip objects with bogus country code 'ZZ' >> - if inetnum.get("country") == "ZZ": >> - log.warning("Skipping network with bogus country 'ZZ': %s" % \ >> - (inetnum.get("inet6num") or inetnum.get("inetnum"))) >> - return >> - >> network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False) >> >> if not self._check_parsed_network(network): >> return >> >> + # Skip objects with unknown country codes >> + if validcountries and inetnum.get("country") not in validcountries: >> + log.warning("Skipping network with bogus country '%s': %s" % \ >> + (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) >> + return >> + >> self.db.execute("INSERT INTO _rirdata(network, country) \ >> VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", >> "%s" % network, inetnum.get("country"), >> @@ -659,7 +666,7 @@ class CLI(object): >> org.get("organisation"), org.get("org-name"), >> ) >> >> - def _parse_line(self, line): >> + def _parse_line(self, line, validcountries = None): >> # Skip version line >> if line.startswith("2"): >> return >> @@ -674,8 +681,15 @@ class CLI(object): >> log.warning("Could not parse line: %s" % line) >> return >> >> - # Skip any lines that are for stats only >> - if country_code == "*": >> + # Skip any lines that are for stats only or do not have a country >> + # code at all (avoids log spam below) >> + if not country_code or country_code == '*': >> + return >> + >> + # Skip objects with unknown country codes >> + if validcountries and country_code not in validcountries: >> + log.warning("Skipping line with bogus country '%s': %s" % \ >> + (country_code, line)) >> return >> >> if type in ("ipv6", "ipv4"): >>