This helps us to determine how many network objects have more than one country set, and what their original country code set looked like.
The second version of this patch uses ALTER TABLE to add the column for original countries, preventing existing SQL setups from breaking.
Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/python/location-importer.in | 52 ++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 20 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index e5f55af..c3f908a 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -165,6 +165,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text); + ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); @@ -377,7 +378,7 @@ class CLI(object): ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL) + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); @@ -407,8 +408,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
- self.db.execute("INSERT INTO networks(network, country) \ - SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) + self.db.execute("INSERT INTO networks(network, country, original_countries) \ + SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
# ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ @@ -421,7 +422,8 @@ class CLI(object): WITH candidates AS ( SELECT _rirdata.network, - _rirdata.country + _rirdata.country, + _rirdata.original_countries FROM _rirdata WHERE @@ -434,6 +436,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country, + c.original_countries, masklen(networks.network), networks.country AS parent_country FROM @@ -447,10 +450,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO - networks(network, country) + networks(network, country, original_countries) SELECT network, - country + country, + original_countries FROM filtered WHERE @@ -617,28 +621,36 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country": - inetnum[key] = val.upper() + # Catch RIR data objects with more than one country code... + if not key in inetnum.keys(): + inetnum[key] = [] + else: + if val.upper() in inetnum.get("country"): + # ... but keep this list distinct... + continue + + inetnum[key].append(val.upper())
# Skip empty objects if not inetnum or not "country" in inetnum: return
+ # Prepare skipping objects with unknown country codes... + invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries] + # Iterate through all networks enumerated from above, check them for plausibility and insert # them into the database, if _check_parsed_network() succeeded for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): if self._check_parsed_network(single_network): - - # Skip objects with unknown country codes - to avoid log spam for invalid or too small - # networks, this check is - kinda ugly - done at this point - if validcountries and inetnum.get("country") not in validcountries: - log.warning("Skipping network with bogus country '%s': %s" % \ - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) + # Skip objects with unknown country codes if they are valid to avoid log spam... + if validcountries and invalidcountries: + log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \ + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) break
- # Everything is fine here, run INSERT statement... - self.db.execute("INSERT INTO _rirdata(network, country) \ - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", - "%s" % single_network, inetnum.get("country"), + self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \ + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", + "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), )
def _parse_org_block(self, block): @@ -729,10 +741,10 @@ class CLI(object): if not self._check_parsed_network(network): return
- self.db.execute("INSERT INTO networks(network, country) \ - VALUES(%s, %s) ON CONFLICT (network) DO \ + self.db.execute("INSERT INTO networks(network, country, original_countries) \ + VALUES(%s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country", - "%s" % network, country, + "%s" % network, country, [country], )
def handle_update_announcements(self, ns):
Hello Michael,
this one should be ready for merging as well.
Or do you want me to rebase it against current master branch?
Thanks, and best regards, Peter Müller
This helps us to determine how many network objects have more than one country set, and what their original country code set looked like.
The second version of this patch uses ALTER TABLE to add the column for original countries, preventing existing SQL setups from breaking.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/location-importer.in | 52 ++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 20 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index e5f55af..c3f908a 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -165,6 +165,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text);
ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
@@ -377,7 +378,7 @@ class CLI(object): ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -407,8 +408,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
self.db.execute("INSERT INTO networks(network, country) \
SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
self.db.execute("INSERT INTO networks(network, country, original_countries) \
SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) # ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -421,7 +422,8 @@ class CLI(object): WITH candidates AS ( SELECT _rirdata.network,
_rirdata.country
_rirdata.country,
_rirdata.original_countries FROM _rirdata WHERE
@@ -434,6 +436,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country,
c.original_countries, masklen(networks.network), networks.country AS parent_country FROM
@@ -447,10 +450,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO
networks(network, country)
networks(network, country, original_countries) SELECT network,
country
country,
original_countries FROM filtered WHERE
@@ -617,28 +621,36 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
inetnum[key] = val.upper()
# Catch RIR data objects with more than one country code...
if not key in inetnum.keys():
inetnum[key] = []
else:
if val.upper() in inetnum.get("country"):
# ... but keep this list distinct...
continue
inetnum[key].append(val.upper())
# Skip empty objects if not inetnum or not "country" in inetnum: return
# Prepare skipping objects with unknown country codes...
invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
# Iterate through all networks enumerated from above, check them for plausibility and insert # them into the database, if _check_parsed_network() succeeded for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): if self._check_parsed_network(single_network):
# Skip objects with unknown country codes - to avoid log spam for invalid or too small
# networks, this check is - kinda ugly - done at this point
if validcountries and inetnum.get("country") not in validcountries:
log.warning("Skipping network with bogus country '%s': %s" % \
(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
# Skip objects with unknown country codes if they are valid to avoid log spam...
if validcountries and invalidcountries:
log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) break
# Everything is fine here, run INSERT statement...
self.db.execute("INSERT INTO _rirdata(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country"),
self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), )
def _parse_org_block(self, block):
@@ -729,10 +741,10 @@ class CLI(object): if not self._check_parsed_network(network): return
self.db.execute("INSERT INTO networks(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO \
self.db.execute("INSERT INTO networks(network, country, original_countries) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country",
"%s" % network, country,
"%s" % network, country, [country],
)
def handle_update_announcements(self, ns):
Yes, please.
It currently doesn’t merge and there are plenty of merge issues.
-Michael
On 26 May 2021, at 19:11, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
this one should be ready for merging as well.
Or do you want me to rebase it against current master branch?
Thanks, and best regards, Peter Müller
This helps us to determine how many network objects have more than one country set, and what their original country code set looked like.
The second version of this patch uses ALTER TABLE to add the column for original countries, preventing existing SQL setups from breaking.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/location-importer.in | 52 ++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 20 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index e5f55af..c3f908a 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -165,6 +165,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text);
ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
@@ -377,7 +378,7 @@ class CLI(object): ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -407,8 +408,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
self.db.execute("INSERT INTO networks(network, country) \
SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
self.db.execute("INSERT INTO networks(network, country, original_countries) \
SELECT network, country, original_countries FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) # ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -421,7 +422,8 @@ class CLI(object): WITH candidates AS ( SELECT _rirdata.network,
_rirdata.country
_rirdata.country,
_rirdata.original_countries FROM _rirdata WHERE
@@ -434,6 +436,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country,
c.original_countries, masklen(networks.network), networks.country AS parent_country FROM
@@ -447,10 +450,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO
networks(network, country)
networks(network, country, original_countries) SELECT network,
country
country,
original_countries FROM filtered WHERE
@@ -617,28 +621,36 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
inetnum[key] = val.upper()
# Catch RIR data objects with more than one country code...
if not key in inetnum.keys():
inetnum[key] = []
else:
if val.upper() in inetnum.get("country"):
# ... but keep this list distinct...
continue
inetnum[key].append(val.upper())
# Skip empty objects if not inetnum or not "country" in inetnum: return
# Prepare skipping objects with unknown country codes...
invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
# Iterate through all networks enumerated from above, check them for plausibility and insert # them into the database, if _check_parsed_network() succeeded for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): if self._check_parsed_network(single_network):
# Skip objects with unknown country codes - to avoid log spam for invalid or too small
# networks, this check is - kinda ugly - done at this point
if validcountries and inetnum.get("country") not in validcountries:
log.warning("Skipping network with bogus country '%s': %s" % \
(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
# Skip objects with unknown country codes if they are valid to avoid log spam...
if validcountries and invalidcountries:
log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) break
# Everything is fine here, run INSERT statement...
self.db.execute("INSERT INTO _rirdata(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country"),
self.db.execute("INSERT INTO _rirdata(network, country, original_countries) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), )
def _parse_org_block(self, block):
@@ -729,10 +741,10 @@ class CLI(object): if not self._check_parsed_network(network): return
self.db.execute("INSERT INTO networks(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO \
self.db.execute("INSERT INTO networks(network, country, original_countries) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country",
"%s" % network, country,
"%s" % network, country, [country],
)
def handle_update_announcements(self, ns):
This helps us to determine how many network objects have more than one country set, and what their original country code set looked like.
The third version of this patch uses ALTER TABLE to add the column for original countries, preventing existing SQL setups from breaking, and is correctly based against the current "master" branch.
Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/python/location-importer.in | 47 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index f796652..a3c16bc 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -166,6 +166,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text); + ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); @@ -379,7 +380,7 @@ class CLI(object): ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, source text NOT NULL) + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); @@ -410,8 +411,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
- self.db.execute("INSERT INTO networks(network, country, source) \ - SELECT network, country, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) + self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ + SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
# ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ @@ -425,6 +426,7 @@ class CLI(object): SELECT _rirdata.network, _rirdata.country, + _rirdata.original_countries, _rirdata.source FROM _rirdata @@ -438,6 +440,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country, + c.original_countries, c.source, masklen(networks.network), networks.country AS parent_country @@ -452,10 +455,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO - networks(network, country, source) + networks(network, country, original_countries, source) SELECT network, country, + original_countries, source FROM filtered @@ -624,28 +628,37 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country": - inetnum[key] = val.upper() + # Catch RIR data objects with more than one country code... + if not key in inetnum.keys(): + inetnum[key] = [] + else: + if val.upper() in inetnum.get("country"): + # ... but keep this list distinct... + continue + + inetnum[key].append(val.upper())
# Skip empty objects if not inetnum or not "country" in inetnum: return
+ # Prepare skipping objects with unknown country codes... + invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries] + # Iterate through all networks enumerated from above, check them for plausibility and insert # them into the database, if _check_parsed_network() succeeded for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): if self._check_parsed_network(single_network):
- # Skip objects with unknown country codes - to avoid log spam for invalid or too small - # networks, this check is - kinda ugly - done at this point - if validcountries and inetnum.get("country") not in validcountries: - log.warning("Skipping network with bogus country '%s': %s" % \ - (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) - break + # Skip objects with unknown country codes if they are valid to avoid log spam... + if validcountries and invalidcountries: + log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \ + (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
# Everything is fine here, run INSERT statement... - self.db.execute("INSERT INTO _rirdata(network, country, source) \ - VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", - "%s" % single_network, inetnum.get("country"), source_key, + self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \ + VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", + "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key, )
def _parse_org_block(self, block, source_key): @@ -736,10 +749,10 @@ class CLI(object): if not self._check_parsed_network(network): return
- self.db.execute("INSERT INTO networks(network, country, source) \ - VALUES(%s, %s, %s) ON CONFLICT (network) DO \ + self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ + VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country", - "%s" % network, country, source_key, + "%s" % network, country, [country], source_key, )
def handle_update_announcements(self, ns):
Hello,
On 30 May 2021, at 09:50, Peter Müller peter.mueller@ipfire.org wrote:
This helps us to determine how many network objects have more than one country set, and what their original country code set looked like.
The third version of this patch uses ALTER TABLE to add the column for original countries, preventing existing SQL setups from breaking, and is correctly based against the current "master" branch.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/location-importer.in | 47 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index f796652..a3c16bc 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -166,6 +166,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text);
ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
@@ -379,7 +380,7 @@ class CLI(object): ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, source text NOT NULL)
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -410,8 +411,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
self.db.execute("INSERT INTO networks(network, country, source) \
SELECT network, country, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) # ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -425,6 +426,7 @@ class CLI(object): SELECT _rirdata.network, _rirdata.country,
_rirdata.original_countries, _rirdata.source FROM _rirdata
@@ -438,6 +440,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country,
c.original_countries, c.source, masklen(networks.network), networks.country AS parent_country
@@ -452,10 +455,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO
networks(network, country, source)
networks(network, country, original_countries, source) SELECT network, country,
original_countries, source FROM filtered
@@ -624,28 +628,37 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
inetnum[key] = val.upper()
# Catch RIR data objects with more than one country code...
if not key in inetnum.keys():
It would be more Pythonic to write “if not key in inetnum:”.
I believe your implementation creates a list which has to be searched whereas mine performs a dictionary lookup which should be much faster. It wouldn’t matter much when you only have a hand full of elements to check, but since we perform this code tens of thousands of times, every little bit counts.
inetnum[key] = []
else:
if val.upper() in inetnum.get("country"):
# ... but keep this list distinct...
continue
inetnum[key].append(val.upper())
You call .upper() a second time when you could just store the value and use that instead.
# Skip empty objects if not inetnum or not "country" in inetnum: return
# Prepare skipping objects with unknown country codes...
invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
- # Iterate through all networks enumerated from above, check them for plausibility and insert # them into the database, if _check_parsed_network() succeeded for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): if self._check_parsed_network(single_network):
# Skip objects with unknown country codes - to avoid log spam for invalid or too small
# networks, this check is - kinda ugly - done at this point
if validcountries and inetnum.get("country") not in validcountries:
log.warning("Skipping network with bogus country '%s': %s" % \
(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
break
# Skip objects with unknown country codes if they are valid to avoid log spam...
if validcountries and invalidcountries:
log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) # Everything is fine here, run INSERT statement...
self.db.execute("INSERT INTO _rirdata(network, country, source) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country"), source_key,
self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key, )
def _parse_org_block(self, block, source_key):
@@ -736,10 +749,10 @@ class CLI(object): if not self._check_parsed_network(network): return
self.db.execute("INSERT INTO networks(network, country, source) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO \
self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country",
"%s" % network, country, source_key,
"%s" % network, country, [country], source_key,
)
def handle_update_announcements(self, ns):
-- 2.26.2
Would you submit a second patch that fixes those minor things and I will merge both?
-Michael
https://lists.ipfire.org/pipermail/location/2021-May/000377.html
Reported-by: Michael Tremer michael.tremer@ipfire.org Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/python/location-importer.in | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index a3c16bc..3e1f859 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -628,15 +628,17 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country": + val = val.upper() + # Catch RIR data objects with more than one country code... - if not key in inetnum.keys(): + if not key in inetnum: inetnum[key] = [] else: - if val.upper() in inetnum.get("country"): + if val in inetnum.get("country"): # ... but keep this list distinct... continue
- inetnum[key].append(val.upper()) + inetnum[key].append(val)
# Skip empty objects if not inetnum or not "country" in inetnum:
Hello Michael,
thanks for your reply and the annotations.
Patch #4390 (https://patchwork.ipfire.org/patch/4390/) should fix them. :-)
Thanks, and best regards, Peter Müller
Hello,
On 30 May 2021, at 09:50, Peter Müller peter.mueller@ipfire.org wrote:
This helps us to determine how many network objects have more than one country set, and what their original country code set looked like.
The third version of this patch uses ALTER TABLE to add the column for original countries, preventing existing SQL setups from breaking, and is correctly based against the current "master" branch.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/location-importer.in | 47 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index f796652..a3c16bc 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -166,6 +166,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text);
ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
@@ -379,7 +380,7 @@ class CLI(object): ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, source text NOT NULL)
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -410,8 +411,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
self.db.execute("INSERT INTO networks(network, country, source) \
SELECT network, country, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) # ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -425,6 +426,7 @@ class CLI(object): SELECT _rirdata.network, _rirdata.country,
_rirdata.original_countries, _rirdata.source FROM _rirdata
@@ -438,6 +440,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country,
c.original_countries, c.source, masklen(networks.network), networks.country AS parent_country
@@ -452,10 +455,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO
networks(network, country, source)
networks(network, country, original_countries, source) SELECT network, country,
original_countries, source FROM filtered
@@ -624,28 +628,37 @@ class CLI(object): inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
inetnum[key] = val.upper()
# Catch RIR data objects with more than one country code...
if not key in inetnum.keys():
It would be more Pythonic to write “if not key in inetnum:”.
I believe your implementation creates a list which has to be searched whereas mine performs a dictionary lookup which should be much faster. It wouldn’t matter much when you only have a hand full of elements to check, but since we perform this code tens of thousands of times, every little bit counts.
inetnum[key] = []
else:
if val.upper() in inetnum.get("country"):
# ... but keep this list distinct...
continue
inetnum[key].append(val.upper())
You call .upper() a second time when you could just store the value and use that instead.
# Skip empty objects if not inetnum or not "country" in inetnum: return
# Prepare skipping objects with unknown country codes...
invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
- # Iterate through all networks enumerated from above, check them for plausibility and insert # them into the database, if _check_parsed_network() succeeded for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): if self._check_parsed_network(single_network):
# Skip objects with unknown country codes - to avoid log spam for invalid or too small
# networks, this check is - kinda ugly - done at this point
if validcountries and inetnum.get("country") not in validcountries:
log.warning("Skipping network with bogus country '%s': %s" % \
(inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
break
# Skip objects with unknown country codes if they are valid to avoid log spam...
if validcountries and invalidcountries:
log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
(invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) # Everything is fine here, run INSERT statement...
self.db.execute("INSERT INTO _rirdata(network, country, source) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country"), source_key,
self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key, )
def _parse_org_block(self, block, source_key):
@@ -736,10 +749,10 @@ class CLI(object): if not self._check_parsed_network(network): return
self.db.execute("INSERT INTO networks(network, country, source) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO \
self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country",
"%s" % network, country, source_key,
"%s" % network, country, [country], source_key,
)
def handle_update_announcements(self, ns):
-- 2.26.2
Would you submit a second patch that fixes those minor things and I will merge both?
-Michael