From: "Peter Müller" <peter.mueller@ipfire.org>
To: location@lists.ipfire.org
Subject: Re: [PATCH] location-importer.in: skip networks with unknown country codes
Date: Thu, 04 Feb 2021 18:32:45 +0100 [thread overview]
Message-ID: <ac5b2be3-f5d7-f3f2-a53b-ddf3b93a5fe1@ipfire.org> (raw)
In-Reply-To: <f12a909a-503a-14e6-149a-ad526d8d6d12@ipfire.org>
[-- Attachment #1: Type: text/plain, Size: 5209 bytes --]
*cough* :-)
> Hello Michael,
>
> if I got this right, this patch still waits acceptance/rejection, which is why I just
> wanted to bring it up again. :-)
>
> Thanks, and best regards,
> Peter Müller
>
>> There is no sense in parsing and storting networks whose country codes
>> cannot be found in the ISO-3166-x country code table. This avoids side
>> effects in applications using the location database, and introduces
>> another sanity check to compensate bogus RIR data.
>>
>> On location02, this affects some networks from APNIC (country code: ZZ)
>> as well as a bunch of smaller allocations within the RIPE region still
>> tagged to CS or YU (Yugoslavia). To my surprise, no network tagged as SU
>> (Soviet Union) was found - while the NIC for .su TLD is still
>> operational. :-)
>>
>> Fixes: #12510
>>
>> Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
>> ---
>> src/python/location-importer.in | 42 ++++++++++++++++++++++-----------
>> 1 file changed, 28 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/python/location-importer.in b/src/python/location-importer.in
>> index 864eab1..89b556a 100644
>> --- a/src/python/location-importer.in
>> +++ b/src/python/location-importer.in
>> @@ -388,10 +388,17 @@ class CLI(object):
>> TRUNCATE TABLE networks;
>> """)
>>
>> + # Fetch all valid country codes to check parsed networks aganist...
>> + rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
>> + validcountries = []
>> +
>> + for row in rows:
>> + validcountries.append(row.country_code)
>> +
>> for source in location.importer.WHOIS_SOURCES:
>> with downloader.request(source, return_blocks=True) as f:
>> for block in f:
>> - self._parse_block(block)
>> + self._parse_block(block, validcountries)
>>
>> # Process all parsed networks from every RIR we happen to have access to,
>> # insert the largest network chunks into the networks table immediately...
>> @@ -467,7 +474,7 @@ class CLI(object):
>> # Download data
>> with downloader.request(source) as f:
>> for line in f:
>> - self._parse_line(line)
>> + self._parse_line(line, validcountries)
>>
>> def _check_parsed_network(self, network):
>> """
>> @@ -532,7 +539,7 @@ class CLI(object):
>> # be suitable for libloc consumption...
>> return True
>>
>> - def _parse_block(self, block):
>> + def _parse_block(self, block, validcountries = None):
>> # Get first line to find out what type of block this is
>> line = block[0]
>>
>> @@ -542,7 +549,7 @@ class CLI(object):
>>
>> # inetnum
>> if line.startswith("inet6num:") or line.startswith("inetnum:"):
>> - return self._parse_inetnum_block(block)
>> + return self._parse_inetnum_block(block, validcountries)
>>
>> # organisation
>> elif line.startswith("organisation:"):
>> @@ -573,7 +580,7 @@ class CLI(object):
>> autnum.get("asn"), autnum.get("org"),
>> )
>>
>> - def _parse_inetnum_block(self, block):
>> + def _parse_inetnum_block(self, block, validcountries = None):
>> log.debug("Parsing inetnum block:")
>>
>> inetnum = {}
>> @@ -624,17 +631,17 @@ class CLI(object):
>> if not inetnum or not "country" in inetnum:
>> return
>>
>> - # Skip objects with bogus country code 'ZZ'
>> - if inetnum.get("country") == "ZZ":
>> - log.warning("Skipping network with bogus country 'ZZ': %s" % \
>> - (inetnum.get("inet6num") or inetnum.get("inetnum")))
>> - return
>> -
>> network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
>>
>> if not self._check_parsed_network(network):
>> return
>>
>> + # Skip objects with unknown country codes
>> + if validcountries and inetnum.get("country") not in validcountries:
>> + log.warning("Skipping network with bogus country '%s': %s" % \
>> + (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> + return
>> +
>> self.db.execute("INSERT INTO _rirdata(network, country) \
>> VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> "%s" % network, inetnum.get("country"),
>> @@ -659,7 +666,7 @@ class CLI(object):
>> org.get("organisation"), org.get("org-name"),
>> )
>>
>> - def _parse_line(self, line):
>> + def _parse_line(self, line, validcountries = None):
>> # Skip version line
>> if line.startswith("2"):
>> return
>> @@ -674,8 +681,15 @@ class CLI(object):
>> log.warning("Could not parse line: %s" % line)
>> return
>>
>> - # Skip any lines that are for stats only
>> - if country_code == "*":
>> + # Skip any lines that are for stats only or do not have a country
>> + # code at all (avoids log spam below)
>> + if not country_code or country_code == '*':
>> + return
>> +
>> + # Skip objects with unknown country codes
>> + if validcountries and country_code not in validcountries:
>> + log.warning("Skipping line with bogus country '%s': %s" % \
>> + (country_code, line))
>> return
>>
>> if type in ("ipv6", "ipv4"):
>>
prev parent reply other threads:[~2021-02-04 17:32 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-30 14:35 Peter Müller
2021-01-26 15:34 ` Peter Müller
2021-02-04 17:32 ` Peter Müller [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ac5b2be3-f5d7-f3f2-a53b-ddf3b93a5fe1@ipfire.org \
--to=peter.mueller@ipfire.org \
--cc=location@lists.ipfire.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox