From mboxrd@z Thu Jan 1 00:00:00 1970 From: Peter =?utf-8?q?M=C3=BCller?= To: location@lists.ipfire.org Subject: Re: [PATCH 1/2] location-importer.in: keep track of sources for networks, ASNs, and organisations Date: Fri, 21 May 2021 12:00:50 +0200 Message-ID: <97efc87c-44fd-836e-3ad9-e70a37a197a1@ipfire.org> In-Reply-To: <20210515115705.9794-1-peter.mueller@ipfire.org> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============0105119851707363332==" List-Id: --===============0105119851707363332== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Hello *, as this would break existing SQL tables as well, I will rework it and hand in= a second version of this patch. Thanks, and best regards, Peter M=C3=BCller > This allows us to trace back concrete changes or anomalies to their RIR > source, without having to parse everything again. Further, it enables > adding 3rd party sources such as IP feeds from Amazon, without loosing > track of the changes introduced by them. >=20 > Depending on the individual systems, it might be necessary to DROP the > tables for autnums and networks first. >=20 > Signed-off-by: Peter M=C3=BCller > --- > src/python/location-importer.in | 93 +++++++++++++++++---------------- > 1 file changed, 49 insertions(+), 44 deletions(-) >=20 > diff --git a/src/python/location-importer.in b/src/python/location-importer= .in > index e5f55af..fd2bde1 100644 > --- a/src/python/location-importer.in > +++ b/src/python/location-importer.in > @@ -155,7 +155,7 @@ class CLI(object): > CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING= GIST(network inet_ops); > =20 > -- autnums > - CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL); > + CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL, = source text NOT NULL); > CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number); > =20 > -- countries > @@ -164,7 +164,7 @@ class CLI(object): > CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(= country_code); > =20 > -- networks > - CREATE TABLE IF NOT EXISTS networks(network inet, country text); > + CREATE TABLE IF NOT EXISTS networks(network inet, country text, source= text NOT NULL); > CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network= ); > CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(fam= ily(network)); > CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(netw= ork inet_ops); > @@ -369,15 +369,15 @@ class CLI(object): > with self.db.transaction(): > # Create some temporary tables to store parsed data > self.db.execute(""" > - CREATE TEMPORARY TABLE _autnums(number integer, organization text) > + CREATE TEMPORARY TABLE _autnums(number integer, organization text, sou= rce text NOT NULL) > ON COMMIT DROP; > CREATE UNIQUE INDEX _autnums_number ON _autnums(number); > =20 > - CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL) > + CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL,= source text NOT NULL) > ON COMMIT DROP; > CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); > =20 > - CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NO= T NULL) > + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NO= T NULL, source text NOT NULL) > ON COMMIT DROP; > CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), = masklen(network)); > CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); > @@ -395,10 +395,11 @@ class CLI(object): > for row in rows: > validcountries.append(row.country_code) > =20 > - for source in location.importer.WHOIS_SOURCES: > - with downloader.request(source, return_blocks=3DTrue) as f: > - for block in f: > - self._parse_block(block, validcountries) > + for source_key in location.importer.WHOIS_SOURCES: > + for single_url in location.importer.WHOIS_SOURCES[source_key]: > + with downloader.request(single_url, return_blocks=3DTrue) as f: > + for block in f: > + self._parse_block(block, source_key, validcountries) > =20 > # Process all parsed networks from every RIR we happen to have access t= o, > # insert the largest network chunks into the networks table immediately= ... > @@ -407,8 +408,8 @@ class CLI(object): > for family in (row.family for row in families): > smallest =3D self.db.get("SELECT MIN(masklen(network)) AS prefix FROM = _rirdata WHERE family(network) =3D %s", family) > =20 > - self.db.execute("INSERT INTO networks(network, country) \ > - SELECT network, country FROM _rirdata WHERE masklen(network) =3D %s A= ND family(network) =3D %s", smallest.prefix, family) > + self.db.execute("INSERT INTO networks(network, country, source) \ > + SELECT network, country, source FROM _rirdata WHERE masklen(network) = =3D %s AND family(network) =3D %s", smallest.prefix, family) > =20 > # ... determine any other prefixes for this network family, ... > prefixes =3D self.db.query("SELECT DISTINCT masklen(network) AS prefix= FROM _rirdata \ > @@ -421,7 +422,8 @@ class CLI(object): > WITH candidates AS ( > SELECT > _rirdata.network, > - _rirdata.country > + _rirdata.country, > + _rirdata.source > FROM > _rirdata > WHERE > @@ -434,6 +436,7 @@ class CLI(object): > DISTINCT ON (c.network) > c.network, > c.country, > + c.source, > masklen(networks.network), > networks.country AS parent_country > FROM > @@ -447,10 +450,11 @@ class CLI(object): > masklen(networks.network) DESC NULLS LAST > ) > INSERT INTO > - networks(network, country) > + networks(network, country, source) > SELECT > network, > - country > + country, > + source > FROM > filtered > WHERE > @@ -462,19 +466,20 @@ class CLI(object): > ) > =20 > self.db.execute(""" > - INSERT INTO autnums(number, name) > - SELECT _autnums.number, _organizations.name FROM _autnums > + INSERT INTO autnums(number, name, source) > + SELECT _autnums.number, _organizations.name, _organizations.source FR= OM _autnums > JOIN _organizations ON _autnums.organization =3D _organizations.hand= le > ON CONFLICT (number) DO UPDATE SET name =3D excluded.name; > """) > =20 > # Download all extended sources > - for source in location.importer.EXTENDED_SOURCES: > - with self.db.transaction(): > - # Download data > - with downloader.request(source) as f: > - for line in f: > - self._parse_line(line, validcountries) > + for source_key in location.importer.EXTENDED_SOURCES: > + for single_url in location.importer.EXTENDED_SOURCES[source_key]: > + with self.db.transaction(): > + # Download data > + with downloader.request(single_url) as f: > + for line in f: > + self._parse_line(line, source_key, validcountries) > =20 > def _check_parsed_network(self, network): > """ > @@ -539,23 +544,23 @@ class CLI(object): > # be suitable for libloc consumption... > return True > =20 > - def _parse_block(self, block, validcountries =3D None): > + def _parse_block(self, block, source_key, validcountries =3D None): > # Get first line to find out what type of block this is > line =3D block[0] > =20 > # aut-num > if line.startswith("aut-num:"): > - return self._parse_autnum_block(block) > + return self._parse_autnum_block(block, source_key) > =20 > # inetnum > if line.startswith("inet6num:") or line.startswith("inetnum:"): > - return self._parse_inetnum_block(block, validcountries) > + return self._parse_inetnum_block(block, source_key, validcountries) > =20 > # organisation > elif line.startswith("organisation:"): > - return self._parse_org_block(block) > + return self._parse_org_block(block, source_key) > =20 > - def _parse_autnum_block(self, block): > + def _parse_autnum_block(self, block, source_key): > autnum =3D {} > for line in block: > # Split line > @@ -574,13 +579,13 @@ class CLI(object): > return > =20 > # Insert into database > - self.db.execute("INSERT INTO _autnums(number, organization) \ > - VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \ > + self.db.execute("INSERT INTO _autnums(number, organization, source) \ > + VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \ > organization =3D excluded.organization", > - autnum.get("asn"), autnum.get("org"), > + autnum.get("asn"), autnum.get("org"), source_key, > ) > =20 > - def _parse_inetnum_block(self, block, validcountries =3D None): > + def _parse_inetnum_block(self, block, source_key, validcountries =3D None= ): > log.debug("Parsing inetnum block:") > =20 > inetnum =3D {} > @@ -636,12 +641,12 @@ class CLI(object): > break > =20 > # Everything is fine here, run INSERT statement... > - self.db.execute("INSERT INTO _rirdata(network, country) \ > - VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country =3D exclud= ed.country", > - "%s" % single_network, inetnum.get("country"), > + self.db.execute("INSERT INTO _rirdata(network, country, source) \ > + VALUES(%s, %s, %s) ON CONFLICT (network) DO UPDATE SET country =3D ex= cluded.country", > + "%s" % single_network, inetnum.get("country"), source_key, > ) > =20 > - def _parse_org_block(self, block): > + def _parse_org_block(self, block, source_key): > org =3D {} > for line in block: > # Split line > @@ -656,13 +661,13 @@ class CLI(object): > if not org: > return > =20 > - self.db.execute("INSERT INTO _organizations(handle, name) \ > - VALUES(%s, %s) ON CONFLICT (handle) DO \ > + self.db.execute("INSERT INTO _organizations(handle, name, source) \ > + VALUES(%s, %s, %s) ON CONFLICT (handle) DO \ > UPDATE SET name =3D excluded.name", > - org.get("organisation"), org.get("org-name"), > + org.get("organisation"), org.get("org-name"), source_key, > ) > =20 > - def _parse_line(self, line, validcountries =3D None): > + def _parse_line(self, line, source_key, validcountries =3D None): > # Skip version line > if line.startswith("2"): > return > @@ -689,9 +694,9 @@ class CLI(object): > return > =20 > if type in ("ipv6", "ipv4"): > - return self._parse_ip_line(country_code, type, line) > + return self._parse_ip_line(country_code, type, line, source_key) > =20 > - def _parse_ip_line(self, country, type, line): > + def _parse_ip_line(self, country, type, line, source_key): > try: > address, prefix, date, status, organization =3D line.split("|") > except ValueError: > @@ -729,10 +734,10 @@ class CLI(object): > if not self._check_parsed_network(network): > return > =20 > - self.db.execute("INSERT INTO networks(network, country) \ > - VALUES(%s, %s) ON CONFLICT (network) DO \ > + self.db.execute("INSERT INTO networks(network, country, source) \ > + VALUES(%s, %s, %s) ON CONFLICT (network) DO \ > UPDATE SET country =3D excluded.country", > - "%s" % network, country, > + "%s" % network, country, source_key, > ) > =20 > def handle_update_announcements(self, ns): >=20 --===============0105119851707363332==--