From: "Peter Müller" <peter.mueller@ipfire.org>
To: location@lists.ipfire.org
Subject: [PATCH 2/8] Revert "Revert "location-importer.in: only import relevant data from AFRINIC, APNIC and RIPE""
Date: Wed, 21 Oct 2020 14:47:37 +0000 [thread overview]
Message-ID: <20201021144743.18083-2-peter.mueller@ipfire.org> (raw)
In-Reply-To: <20201021144743.18083-1-peter.mueller@ipfire.org>
[-- Attachment #1: Type: text/plain, Size: 5385 bytes --]
This reverts commit 13f67f285856e8eabfeff2daf1be3aeaa36a82cc.
Signed-off-by: Peter Müller <peter.mueller(a)ipfire.org>
---
src/python/location-importer.in | 89 ++++++++++++++++++++++++++++++++-
1 file changed, 87 insertions(+), 2 deletions(-)
diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index b220eaf..d249a35 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -165,6 +165,7 @@ class CLI(object):
-- networks
CREATE TABLE IF NOT EXISTS networks(network inet, country text);
CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
+ CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
-- overrides
@@ -365,6 +366,16 @@ class CLI(object):
CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL)
ON COMMIT DROP;
CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
+
+ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
+ ON COMMIT DROP;
+ CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
+ CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
+ """)
+
+ # Remove all previously imported content
+ self.db.execute("""
+ TRUNCATE TABLE networks;
""")
for source in location.importer.WHOIS_SOURCES:
@@ -372,6 +383,67 @@ class CLI(object):
for block in f:
self._parse_block(block)
+ # Process all parsed networks from every RIR we happen to have access to,
+ # insert the largest network chunks into the networks table immediately...
+ families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
+
+ for family in (row.family for row in families):
+ smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
+
+ self.db.execute("INSERT INTO networks(network, country) \
+ SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
+
+ # ... determine any other prefixes for this network family, ...
+ prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
+ WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
+
+ # ... and insert networks with this prefix in case they provide additional
+ # information (i. e. subnet of a larger chunk with a different country)
+ for prefix in (row.prefix for row in prefixes):
+ self.db.execute("""
+ WITH candidates AS (
+ SELECT
+ _rirdata.network,
+ _rirdata.country
+ FROM
+ _rirdata
+ WHERE
+ family(_rirdata.network) = %s
+ AND
+ masklen(_rirdata.network) = %s
+ ),
+ filtered AS (
+ SELECT
+ DISTINCT ON (c.network)
+ c.network,
+ c.country,
+ masklen(networks.network),
+ networks.country AS parent_country
+ FROM
+ candidates c
+ LEFT JOIN
+ networks
+ ON
+ c.network << networks.network
+ ORDER BY
+ c.network,
+ masklen(networks.network) DESC NULLS LAST
+ )
+ INSERT INTO
+ networks(network, country)
+ SELECT
+ network,
+ country
+ FROM
+ filtered
+ WHERE
+ parent_country IS NULL
+ OR
+ country <> parent_country
+ ON CONFLICT DO NOTHING""",
+ family, prefix,
+ )
+
self.db.execute("""
INSERT INTO autnums(number, name)
SELECT _autnums.number, _organizations.name FROM _autnums
@@ -472,17 +544,30 @@ class CLI(object):
inetnum[key] = val.upper()
# Skip empty objects
- if not inetnum:
+ if not inetnum or not "country" in inetnum:
return
network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
+ # Bail out in case we have processed a network covering the entire IP range, which
+ # is necessary to work around faulty (?) IPv6 network processing
+ if network.prefixlen == 0:
+ logging.warning("Skipping network covering the entire IP adress range: %s" % network)
+ return
+
+ # Bail out in case we have processed a network whose prefix length indicates it is
+ # not globally routable (we have decided not to process them at the moment, as they
+ # significantly enlarge our database without providing very helpful additional information)
+ if (network.prefixlen > 24 and network.version == 4) or (network.prefixlen > 48 and network.version == 6):
+ logging.info("Skipping network too small to be publicly announced: %s" % network)
+ return
+
# Bail out in case we have processed a non-public IP network
if network.is_private:
logging.warning("Skipping non-globally routable network: %s" % network)
return
- self.db.execute("INSERT INTO networks(network, country) \
+ self.db.execute("INSERT INTO _rirdata(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % network, inetnum.get("country"),
)
--
2.20.1
next prev parent reply other threads:[~2020-10-21 14:47 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-21 14:47 [PATCH 1/8] Revert "Revert "Revert "Revert "importer: Import raw sources for inetnum's again"""" Peter Müller
2020-10-21 14:47 ` Peter Müller [this message]
2020-10-21 14:47 ` [PATCH 3/8] export.py: fix exporting IP networks for crappy xt_geoip module Peter Müller
2020-10-21 14:47 ` [PATCH 4/8] location-importer.in: filter bogus IP networks for both Whois and extended sources Peter Müller
2020-10-21 14:47 ` [PATCH 5/8] importer.py: fetch LACNIC data via HTTPS Peter Müller
2020-10-21 14:47 ` [PATCH 6/8] location-importer.in: omit historic/orphaned RIR data Peter Müller
2020-10-21 14:47 ` [PATCH 7/8] location-importer.in: Create gist index for announcement table as well Peter Müller
2020-10-21 14:47 ` [PATCH 8/8] location-importer.in: avoid log spam for too small networks Peter Müller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201021144743.18083-2-peter.mueller@ipfire.org \
--to=peter.mueller@ipfire.org \
--cc=location@lists.ipfire.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox