From: Michael Tremer <michael.tremer@ipfire.org>
To: location@lists.ipfire.org
Subject: Re: [PATCH v2] location-importer.in: track original countries as well
Date: Thu, 27 May 2021 11:53:20 +0100 [thread overview]
Message-ID: <ACD3F0E0-3C73-403E-B055-567035F15A7C@ipfire.org> (raw)
In-Reply-To: <eea6b0a0-0590-3534-863c-3082e8937273@ipfire.org>
[-- Attachment #1: Type: text/plain, Size: 6908 bytes --]
Yes, please.
It currently doesn’t merge and there are plenty of merge issues.
-Michael
> On 26 May 2021, at 19:11, Peter Müller <peter.mueller(a)ipfire.org> wrote:
>
> Hello Michael,
>
> this one should be ready for merging as well.
>
> Or do you want me to rebase it against current master branch?
>
> Thanks, and best regards,
> Peter Müller
>
>
>> This helps us to determine how many network objects have more than one
>> country set, and what their original country code set looked like.
>>
>> The second version of this patch uses ALTER TABLE to add the column for
>> original countries, preventing existing SQL setups from breaking.
>>
>> Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
>> ---
>> src/python/location-importer.in | 52 ++++++++++++++++++++-------------
>> 1 file changed, 32 insertions(+), 20 deletions(-)
>>
>> diff --git a/src/python/location-importer.in b/src/python/location-importer.in
>> index e5f55af..c3f908a 100644
>> --- a/src/python/location-importer.in
>> +++ b/src/python/location-importer.in
>> @@ -165,6 +165,7 @@ class CLI(object):
>>
>> -- networks
>> CREATE TABLE IF NOT EXISTS networks(network inet, country text);
>> + ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
>> CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
>> CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
>> CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
>> @@ -377,7 +378,7 @@ class CLI(object):
>> ON COMMIT DROP;
>> CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
>>
>> - CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
>> + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL)
>> ON COMMIT DROP;
>> CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
>> CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
>> @@ -407,8 +408,8 @@ class CLI(object):
>> for family in (row.family for row in families):
>> smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
>>
>> - self.db.execute("INSERT INTO networks(network, country) \
>> - SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>> + self.db.execute("INSERT INTO networks(network, country, original_countries) \
>> + SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
>>
>> # ... determine any other prefixes for this network family, ...
>> prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
>> @@ -421,7 +422,8 @@ class CLI(object):
>> WITH candidates AS (
>> SELECT
>> _rirdata.network,
>> - _rirdata.country
>> + _rirdata.country,
>> + _rirdata.original_countries
>> FROM
>> _rirdata
>> WHERE
>> @@ -434,6 +436,7 @@ class CLI(object):
>> DISTINCT ON (c.network)
>> c.network,
>> c.country,
>> + c.original_countries,
>> masklen(networks.network),
>> networks.country AS parent_country
>> FROM
>> @@ -447,10 +450,11 @@ class CLI(object):
>> masklen(networks.network) DESC NULLS LAST
>> )
>> INSERT INTO
>> - networks(network, country)
>> + networks(network, country, original_countries)
>> SELECT
>> network,
>> - country
>> + country,
>> + original_countries
>> FROM
>> filtered
>> WHERE
>> @@ -617,28 +621,36 @@ class CLI(object):
>> inetnum[key] = [ipaddress.ip_network(val, strict=False)]
>>
>> elif key == "country":
>> - inetnum[key] = val.upper()
>> + # Catch RIR data objects with more than one country code...
>> + if not key in inetnum.keys():
>> + inetnum[key] = []
>> + else:
>> + if val.upper() in inetnum.get("country"):
>> + # ... but keep this list distinct...
>> + continue
>> +
>> + inetnum[key].append(val.upper())
>>
>> # Skip empty objects
>> if not inetnum or not "country" in inetnum:
>> return
>>
>> + # Prepare skipping objects with unknown country codes...
>> + invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
>> +
>> # Iterate through all networks enumerated from above, check them for plausibility and insert
>> # them into the database, if _check_parsed_network() succeeded
>> for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
>> if self._check_parsed_network(single_network):
>> -
>> - # Skip objects with unknown country codes - to avoid log spam for invalid or too small
>> - # networks, this check is - kinda ugly - done at this point
>> - if validcountries and inetnum.get("country") not in validcountries:
>> - log.warning("Skipping network with bogus country '%s': %s" % \
>> - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> + # Skip objects with unknown country codes if they are valid to avoid log spam...
>> + if validcountries and invalidcountries:
>> + log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
>> + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
>> break
>>
>> - # Everything is fine here, run INSERT statement...
>> - self.db.execute("INSERT INTO _rirdata(network, country) \
>> - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> - "%s" % single_network, inetnum.get("country"),
>> + self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
>> + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
>> + "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"),
>> )
>>
>> def _parse_org_block(self, block):
>> @@ -729,10 +741,10 @@ class CLI(object):
>> if not self._check_parsed_network(network):
>> return
>>
>> - self.db.execute("INSERT INTO networks(network, country) \
>> - VALUES(%s, %s) ON CONFLICT (network) DO \
>> + self.db.execute("INSERT INTO networks(network, country, original_countries) \
>> + VALUES(%s, %s, %s) ON CONFLICT (network) DO \
>> UPDATE SET country = excluded.country",
>> - "%s" % network, country,
>> + "%s" % network, country, [country],
>> )
>>
>> def handle_update_announcements(self, ns):
>>
next prev parent reply other threads:[~2021-05-27 10:53 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-22 12:57 Peter Müller
2021-05-26 18:11 ` Peter Müller
2021-05-27 10:53 ` Michael Tremer [this message]
2021-05-30 8:50 ` [PATCH v3] " Peter Müller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ACD3F0E0-3C73-403E-B055-567035F15A7C@ipfire.org \
--to=michael.tremer@ipfire.org \
--cc=location@lists.ipfire.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox