This allows us to trace back concrete changes or anomalies to their RIR source, without having to parse everything again. Further, it enables adding 3rd party sources such as IP feeds from Amazon, without loosing track of the changes introduced by them.
The second version of this patchset uses ALTER TABLE to add the source columns, avoiding breaking existing SQL setups.
Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/python/location-importer.in | 91 ++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 42 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index e5f55af..f796652 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -156,6 +156,7 @@ class CLI(object):
-- autnums CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL); + ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
-- countries @@ -165,6 +166,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text); + ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); @@ -369,15 +371,15 @@ class CLI(object): with self.db.transaction(): # Create some temporary tables to store parsed data self.db.execute(""" - CREATE TEMPORARY TABLE _autnums(number integer, organization text) + CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL) ON COMMIT DROP; CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
- CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL) + CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL) + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); @@ -395,10 +397,11 @@ class CLI(object): for row in rows: validcountries.append(row.country_code)
- for source in location.importer.WHOIS_SOURCES: - with downloader.request(source, return_blocks=True) as f: - for block in f: - self._parse_block(block, validcountries) + for source_key in location.importer.WHOIS_SOURCES: + for single_url in location.importer.WHOIS_SOURCES[source_key]: + with downloader.request(single_url, return_blocks=True) as f: + for block in f: + self._parse_block(block, source_key, validcountries)
# Process all parsed networks from every RIR we happen to have access to, # insert the largest network chunks into the networks table immediately... @@ -407,8 +410,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
- self.db.execute("INSERT INTO networks(network, country) \ - SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) + self.db.execute("INSERT INTO networks(network, country, source) \ + SELECT network, country, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
# ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ @@ -421,7 +424,8 @@ class CLI(object): WITH candidates AS ( SELECT _rirdata.network, - _rirdata.country + _rirdata.country, + _rirdata.source FROM _rirdata WHERE @@ -434,6 +438,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country, + c.source, masklen(networks.network), networks.country AS parent_country FROM @@ -447,10 +452,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO - networks(network, country) + networks(network, country, source) SELECT network, - country + country, + source FROM filtered WHERE @@ -462,19 +468,20 @@ class CLI(object): )
self.db.execute(""" - INSERT INTO autnums(number, name) - SELECT _autnums.number, _organizations.name FROM _autnums + INSERT INTO autnums(number, name, source) + SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums JOIN _organizations ON _autnums.organization = _organizations.handle ON CONFLICT (number) DO UPDATE SET name = excluded.name; """)
# Download all extended sources - for source in location.importer.EXTENDED_SOURCES: - with self.db.transaction(): - # Download data - with downloader.request(source) as f: - for line in f: - self._parse_line(line, validcountries) + for source_key in location.importer.EXTENDED_SOURCES: + for single_url in location.importer.EXTENDED_SOURCES[source_key]: + with self.db.transaction(): + # Download data + with downloader.request(single_url) as f: + for line in f: + self._parse_line(line, source_key, validcountries)
def _check_parsed_network(self, network): """ @@ -539,23 +546,23 @@ class CLI(object): # be suitable for libloc consumption... return True
- def _parse_block(self, block, validcountries = None): + def _parse_block(self, block, source_key, validcountries = None): # Get first line to find out what type of block this is line = block[0]
# aut-num if line.startswith("aut-num:"): - return self._parse_autnum_block(block) + return self._parse_autnum_block(block, source_key)
# inetnum if line.startswith("inet6num:") or line.startswith("inetnum:"): - return self._parse_inetnum_block(block, validcountries) + return self._parse_inetnum_block(block, source_key, validcountries)
# organisation elif line.startswith("organisation:"): - return self._parse_org_block(block) + return self._parse_org_block(block, source_key)
- def _parse_autnum_block(self, block): + def _parse_autnum_block(self, block, source_key): autnum = {} for line in block: # Split line @@ -574,13 +581,13 @@ class CLI(object): return
# Insert into database - self.db.execute("INSERT INTO _autnums(number, organization) \ - VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \ + self.db.execute("INSERT INTO _autnums(number, organization, source) \ + VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \ organization = excluded.organization", - autnum.get("asn"), autnum.get("org"), + autnum.get("asn"), autnum.get("org"), source_key, )
- def _parse_inetnum_block(self, block, validcountries = None): + def _parse_inetnum_block(self, block, source_key, validcountries = None): log.debug("Parsing inetnum block:")
inetnum = {} @@ -636,12 +643,12 @@ class CLI(object): break
# Everything is fine here, run INSERT statement... - self.db.execute("INSERT INTO _rirdata(network, country) \ - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", - "%s" % single_network, inetnum.get("country"), + self.db.execute("INSERT INTO _rirdata(network, country, source) \ + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", + "%s" % single_network, inetnum.get("country"), source_key, )
- def _parse_org_block(self, block): + def _parse_org_block(self, block, source_key): org = {} for line in block: # Split line @@ -656,13 +663,13 @@ class CLI(object): if not org: return
- self.db.execute("INSERT INTO _organizations(handle, name) \ - VALUES(%s, %s) ON CONFLICT (handle) DO \ + self.db.execute("INSERT INTO _organizations(handle, name, source) \ + VALUES(%s, %s, %s) ON CONFLICT (handle) DO \ UPDATE SET name = excluded.name", - org.get("organisation"), org.get("org-name"), + org.get("organisation"), org.get("org-name"), source_key, )
- def _parse_line(self, line, validcountries = None): + def _parse_line(self, line, source_key, validcountries = None): # Skip version line if line.startswith("2"): return @@ -689,9 +696,9 @@ class CLI(object): return
if type in ("ipv6", "ipv4"): - return self._parse_ip_line(country_code, type, line) + return self._parse_ip_line(country_code, type, line, source_key)
- def _parse_ip_line(self, country, type, line): + def _parse_ip_line(self, country, type, line, source_key): try: address, prefix, date, status, organization = line.split("|") except ValueError: @@ -729,10 +736,10 @@ class CLI(object): if not self._check_parsed_network(network): return
- self.db.execute("INSERT INTO networks(network, country) \ - VALUES(%s, %s) ON CONFLICT (network) DO \ + self.db.execute("INSERT INTO networks(network, country, source) \ + VALUES(%s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country", - "%s" % network, country, + "%s" % network, country, source_key, )
def handle_update_announcements(self, ns):
Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/python/importer.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index 5f46bc3..4c8406c 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -25,50 +25,68 @@ import urllib.request log = logging.getLogger("location.importer") log.propagate = 1
-WHOIS_SOURCES = ( +WHOIS_SOURCES = { # African Network Information Centre - "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz", + "AFRINIC": [ + "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz" + ],
# Asia Pacific Network Information Centre - "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", - #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", - #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", - "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz", + "APNIC": [ + "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", + #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", + #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz" + ],
# American Registry for Internet Numbers # XXX there is nothing useful for us in here - #"https://ftp.arin.net/pub/rr/arin.db", + # ARIN: [ + # "https://ftp.arin.net/pub/rr/arin.db" + # ],
# Latin America and Caribbean Network Information Centre # XXX ???
# Réseaux IP Européens - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", - #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", - #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", - "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz", -) - -EXTENDED_SOURCES = ( + "RIPE": [ + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", + #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", + #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz" + ], +} + +EXTENDED_SOURCES = { # African Network Information Centre - #"https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest", + # "ARIN": [ + # "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest" + # ],
# Asia Pacific Network Information Centre - #"https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest", + # "APNIC": [ + # "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest" + # ],
# American Registry for Internet Numbers - "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest", + "ARIN": [ + "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest" + ],
# Latin America and Caribbean Network Information Centre - "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest", + "LACNIC": [ + "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest" + ],
# Réseaux IP Européens - #"https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest", -) + # "RIPE": [ + # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest" + # ], +}
class Downloader(object): def __init__(self):
Hallo,
This probably should have been in the first patch because it won’t work without this.
Best, -Michael
On 22 May 2021, at 21:33, Peter Müller peter.mueller@ipfire.org wrote:
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/importer.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index 5f46bc3..4c8406c 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -25,50 +25,68 @@ import urllib.request log = logging.getLogger("location.importer") log.propagate = 1
-WHOIS_SOURCES = ( +WHOIS_SOURCES = { # African Network Information Centre
"AFRINIC": [
"https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
],
# Asia Pacific Network Information Centre
- "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
"APNIC": [
"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
],
# American Registry for Internet Numbers # XXX there is nothing useful for us in here
# ARIN: [
# ],
# Latin America and Caribbean Network Information Centre # XXX ???
# Réseaux IP Européens
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
-)
-EXTENDED_SOURCES = (
- "RIPE": [
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
],
+}
+EXTENDED_SOURCES = { # African Network Information Centre
# "ARIN": [
# "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
# ],
# Asia Pacific Network Information Centre
# "APNIC": [
# "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
# ],
# American Registry for Internet Numbers
"ARIN": [
"https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
],
# Latin America and Caribbean Network Information Centre
"LACNIC": [
"https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
],
# Réseaux IP Européens
-)
- # "RIPE": [
- # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
- # ],
+}
class Downloader(object): def __init__(self): -- 2.20.1
Hello Michael,
thanks for your reply.
I am not sure if I understood you correctly. What am I missing in which patch(set)?
Thanks, and best regards, Peter Müller
Hallo,
This probably should have been in the first patch because it won’t work without this.
Best, -Michael
On 22 May 2021, at 21:33, Peter Müller peter.mueller@ipfire.org wrote:
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/importer.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index 5f46bc3..4c8406c 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -25,50 +25,68 @@ import urllib.request log = logging.getLogger("location.importer") log.propagate = 1
-WHOIS_SOURCES = ( +WHOIS_SOURCES = { # African Network Information Centre
"AFRINIC": [
"https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
],
# Asia Pacific Network Information Centre
- "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
"APNIC": [
"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
],
# American Registry for Internet Numbers # XXX there is nothing useful for us in here
# ARIN: [
# ],
# Latin America and Caribbean Network Information Centre # XXX ???
# Réseaux IP Européens
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
-)
-EXTENDED_SOURCES = (
- "RIPE": [
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
],
+}
+EXTENDED_SOURCES = { # African Network Information Centre
# "ARIN": [
# "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
# ],
# Asia Pacific Network Information Centre
# "APNIC": [
# "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
# ],
# American Registry for Internet Numbers
"ARIN": [
"https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
],
# Latin America and Caribbean Network Information Centre
"LACNIC": [
"https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
],
# Réseaux IP Européens
-)
- # "RIPE": [
- # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
- # ],
+}
class Downloader(object): def __init__(self): -- 2.20.1
Nothing. It is merged.
I am just saying that you split one change into two, but merging only one patch breaks the code - and that shouldn’t happen.
-Michael
On 26 May 2021, at 19:11, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
thanks for your reply.
I am not sure if I understood you correctly. What am I missing in which patch(set)?
Thanks, and best regards, Peter Müller
Hallo,
This probably should have been in the first patch because it won’t work without this.
Best, -Michael
On 22 May 2021, at 21:33, Peter Müller peter.mueller@ipfire.org wrote:
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/importer.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index 5f46bc3..4c8406c 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -25,50 +25,68 @@ import urllib.request log = logging.getLogger("location.importer") log.propagate = 1
-WHOIS_SOURCES = ( +WHOIS_SOURCES = { # African Network Information Centre
"AFRINIC": [
"https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
],
# Asia Pacific Network Information Centre
- "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
"APNIC": [
"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
],
# American Registry for Internet Numbers # XXX there is nothing useful for us in here
# ARIN: [
# ],
# Latin America and Caribbean Network Information Centre # XXX ???
# Réseaux IP Européens
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
-)
-EXTENDED_SOURCES = (
- "RIPE": [
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
],
+}
+EXTENDED_SOURCES = { # African Network Information Centre
# "ARIN": [
# "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
# ],
# Asia Pacific Network Information Centre
# "APNIC": [
# "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
# ],
# American Registry for Internet Numbers
"ARIN": [
"https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
],
# Latin America and Caribbean Network Information Centre
"LACNIC": [
"https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
],
# Réseaux IP Européens
-)
- # "RIPE": [
- # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
- # ],
+}
class Downloader(object): def __init__(self): -- 2.20.1
Hello Michael,
thanks for your reply.
Actually, I have never looked at patchsets being able to work in part - to me, they always seemed to be atomic. Good to know this is wrong, I guess... :-)
Thanks, and best regards, Peter Müller
Nothing. It is merged.
I am just saying that you split one change into two, but merging only one patch breaks the code - and that shouldn’t happen.
-Michael
On 26 May 2021, at 19:11, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
thanks for your reply.
I am not sure if I understood you correctly. What am I missing in which patch(set)?
Thanks, and best regards, Peter Müller
Hallo,
This probably should have been in the first patch because it won’t work without this.
Best, -Michael
On 22 May 2021, at 21:33, Peter Müller peter.mueller@ipfire.org wrote:
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/importer.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index 5f46bc3..4c8406c 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -25,50 +25,68 @@ import urllib.request log = logging.getLogger("location.importer") log.propagate = 1
-WHOIS_SOURCES = ( +WHOIS_SOURCES = { # African Network Information Centre
"AFRINIC": [
"https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
],
# Asia Pacific Network Information Centre
- "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
"APNIC": [
"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
],
# American Registry for Internet Numbers # XXX there is nothing useful for us in here
# ARIN: [
# ],
# Latin America and Caribbean Network Information Centre # XXX ???
# Réseaux IP Européens
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
-)
-EXTENDED_SOURCES = (
- "RIPE": [
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
],
+}
+EXTENDED_SOURCES = { # African Network Information Centre
# "ARIN": [
# "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
# ],
# Asia Pacific Network Information Centre
# "APNIC": [
# "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
# ],
# American Registry for Internet Numbers
"ARIN": [
"https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
],
# Latin America and Caribbean Network Information Centre
"LACNIC": [
"https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
],
# Réseaux IP Européens
-)
- # "RIPE": [
- # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
- # ],
+}
class Downloader(object): def __init__(self): -- 2.20.1
Hello,
On 30 May 2021, at 09:08, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
thanks for your reply.
Actually, I have never looked at patchsets being able to work in part - to me, they always seemed to be atomic. Good to know this is wrong, I guess... :-)
Sometimes it is difficult to do this, but it would be good to have them at least compile and run so that “git bisect” can be used.
-Michael
Thanks, and best regards, Peter Müller
Nothing. It is merged.
I am just saying that you split one change into two, but merging only one patch breaks the code - and that shouldn’t happen.
-Michael
On 26 May 2021, at 19:11, Peter Müller peter.mueller@ipfire.org wrote:
Hello Michael,
thanks for your reply.
I am not sure if I understood you correctly. What am I missing in which patch(set)?
Thanks, and best regards, Peter Müller
Hallo,
This probably should have been in the first patch because it won’t work without this.
Best, -Michael
On 22 May 2021, at 21:33, Peter Müller peter.mueller@ipfire.org wrote:
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/importer.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/python/importer.py b/src/python/importer.py index 5f46bc3..4c8406c 100644 --- a/src/python/importer.py +++ b/src/python/importer.py @@ -25,50 +25,68 @@ import urllib.request log = logging.getLogger("location.importer") log.propagate = 1
-WHOIS_SOURCES = ( +WHOIS_SOURCES = { # African Network Information Centre
"AFRINIC": [
"https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
],
# Asia Pacific Network Information Centre
- "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
- #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
- "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
"APNIC": [
"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
#"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
"https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
],
# American Registry for Internet Numbers # XXX there is nothing useful for us in here
# ARIN: [
# ],
# Latin America and Caribbean Network Information Centre # XXX ???
# Réseaux IP Européens
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
-)
-EXTENDED_SOURCES = (
- "RIPE": [
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
"https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
],
+}
+EXTENDED_SOURCES = { # African Network Information Centre
# "ARIN": [
# "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
# ],
# Asia Pacific Network Information Centre
# "APNIC": [
# "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
# ],
# American Registry for Internet Numbers
"ARIN": [
"https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
],
# Latin America and Caribbean Network Information Centre
"LACNIC": [
"https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
],
# Réseaux IP Européens
-)
- # "RIPE": [
- # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
- # ],
+}
class Downloader(object): def __init__(self): -- 2.20.1
Hello,
On 22 May 2021, at 21:33, Peter Müller peter.mueller@ipfire.org wrote:
This allows us to trace back concrete changes or anomalies to their RIR source, without having to parse everything again. Further, it enables adding 3rd party sources such as IP feeds from Amazon, without loosing track of the changes introduced by them.
The second version of this patchset uses ALTER TABLE to add the source columns, avoiding breaking existing SQL setups.
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/python/location-importer.in | 91 ++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 42 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in index e5f55af..f796652 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -156,6 +156,7 @@ class CLI(object):
-- autnums CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number); -- countries
@@ -165,6 +166,7 @@ class CLI(object):
-- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text);
ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL; CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
@@ -369,15 +371,15 @@ class CLI(object): with self.db.transaction(): # Create some temporary tables to store parsed data self.db.execute("""
CREATE TEMPORARY TABLE _autnums(number integer, organization text)
CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL) ON COMMIT DROP; CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL)
CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, source text NOT NULL) ON COMMIT DROP; CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
@@ -395,10 +397,11 @@ class CLI(object): for row in rows: validcountries.append(row.country_code)
for source in location.importer.WHOIS_SOURCES:
with downloader.request(source, return_blocks=True) as f:
for block in f:
self._parse_block(block, validcountries)
for source_key in location.importer.WHOIS_SOURCES:
source_key is probably not the most intuitive name for this variable, but since the code works, I won’t complain.
for single_url in location.importer.WHOIS_SOURCES[source_key]:
with downloader.request(single_url, return_blocks=True) as f:
for block in f:
self._parse_block(block, source_key, validcountries) # Process all parsed networks from every RIR we happen to have access to, # insert the largest network chunks into the networks table immediately...
@@ -407,8 +410,8 @@ class CLI(object): for family in (row.family for row in families): smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
self.db.execute("INSERT INTO networks(network, country) \
SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
self.db.execute("INSERT INTO networks(network, country, source) \
SELECT network, country, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) # ... determine any other prefixes for this network family, ... prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
@@ -421,7 +424,8 @@ class CLI(object): WITH candidates AS ( SELECT _rirdata.network,
_rirdata.country
_rirdata.country,
_rirdata.source FROM _rirdata WHERE
@@ -434,6 +438,7 @@ class CLI(object): DISTINCT ON (c.network) c.network, c.country,
c.source, masklen(networks.network), networks.country AS parent_country FROM
@@ -447,10 +452,11 @@ class CLI(object): masklen(networks.network) DESC NULLS LAST ) INSERT INTO
networks(network, country)
networks(network, country, source) SELECT network,
country
country,
source FROM filtered WHERE
@@ -462,19 +468,20 @@ class CLI(object): )
self.db.execute("""
INSERT INTO autnums(number, name)
SELECT _autnums.number, _organizations.name FROM _autnums
INSERT INTO autnums(number, name, source)
SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums JOIN _organizations ON _autnums.organization = _organizations.handle ON CONFLICT (number) DO UPDATE SET name = excluded.name; """)
# Download all extended sources
for source in location.importer.EXTENDED_SOURCES:
with self.db.transaction():
# Download data
with downloader.request(source) as f:
for line in f:
self._parse_line(line, validcountries)
for source_key in location.importer.EXTENDED_SOURCES:
for single_url in location.importer.EXTENDED_SOURCES[source_key]:
with self.db.transaction():
# Download data
with downloader.request(single_url) as f:
for line in f:
self._parse_line(line, source_key, validcountries)
def _check_parsed_network(self, network): """
@@ -539,23 +546,23 @@ class CLI(object): # be suitable for libloc consumption... return True
- def _parse_block(self, block, validcountries = None):
def _parse_block(self, block, source_key, validcountries = None): # Get first line to find out what type of block this is line = block[0]
# aut-num if line.startswith("aut-num:"):
return self._parse_autnum_block(block)
return self._parse_autnum_block(block, source_key)
# inetnum if line.startswith("inet6num:") or line.startswith("inetnum:"):
return self._parse_inetnum_block(block, validcountries)
return self._parse_inetnum_block(block, source_key, validcountries)
# organisation elif line.startswith("organisation:"):
return self._parse_org_block(block)
return self._parse_org_block(block, source_key)
- def _parse_autnum_block(self, block):
- def _parse_autnum_block(self, block, source_key): autnum = {} for line in block: # Split line
@@ -574,13 +581,13 @@ class CLI(object): return
# Insert into database
self.db.execute("INSERT INTO _autnums(number, organization) \
VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
self.db.execute("INSERT INTO _autnums(number, organization, source) \
VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \ organization = excluded.organization",
autnum.get("asn"), autnum.get("org"),
)autnum.get("asn"), autnum.get("org"), source_key,
- def _parse_inetnum_block(self, block, validcountries = None):
def _parse_inetnum_block(self, block, source_key, validcountries = None): log.debug("Parsing inetnum block:")
inetnum = {}
@@ -636,12 +643,12 @@ class CLI(object): break
# Everything is fine here, run INSERT statement...
self.db.execute("INSERT INTO _rirdata(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country"),
self.db.execute("INSERT INTO _rirdata(network, country, source) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % single_network, inetnum.get("country"), source_key, )
- def _parse_org_block(self, block):
- def _parse_org_block(self, block, source_key): org = {} for line in block: # Split line
@@ -656,13 +663,13 @@ class CLI(object): if not org: return
self.db.execute("INSERT INTO _organizations(handle, name) \
VALUES(%s, %s) ON CONFLICT (handle) DO \
self.db.execute("INSERT INTO _organizations(handle, name, source) \
VALUES(%s, %s, %s) ON CONFLICT (handle) DO \ UPDATE SET name = excluded.name",
org.get("organisation"), org.get("org-name"),
)org.get("organisation"), org.get("org-name"), source_key,
- def _parse_line(self, line, validcountries = None):
- def _parse_line(self, line, source_key, validcountries = None): # Skip version line if line.startswith("2"): return
@@ -689,9 +696,9 @@ class CLI(object): return
if type in ("ipv6", "ipv4"):
return self._parse_ip_line(country_code, type, line)
return self._parse_ip_line(country_code, type, line, source_key)
- def _parse_ip_line(self, country, type, line):
- def _parse_ip_line(self, country, type, line, source_key): try: address, prefix, date, status, organization = line.split("|") except ValueError:
@@ -729,10 +736,10 @@ class CLI(object): if not self._check_parsed_network(network): return
self.db.execute("INSERT INTO networks(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO \
self.db.execute("INSERT INTO networks(network, country, source) \
VALUES(%s, %s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country",
"%s" % network, country,
"%s" % network, country, source_key,
)
def handle_update_announcements(self, ns):
-- 2.20.1