This patch replaces our previous source for AS names in ARIN's realms with another file provided by ARIN that contains human-readable names for organizations ASNs have been allocated to.
Please note that a
TRUNCATE autnums;
is necessary on machines previously running the old version of location-importer, in order to make use of this changed data source.
Signed-off-by: Peter Müller peter.mueller@ipfire.org --- src/scripts/location-importer.in | 47 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 28a4f6c..96b3a20 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -3,7 +3,7 @@ # # # libloc - A library to determine the location of someone on the Internet # # # -# Copyright (C) 2020-2022 IPFire Development Team info@ipfire.org # +# Copyright (C) 2020-2023 IPFire Development Team info@ipfire.org # # # # This library is free software; you can redistribute it and/or # # modify it under the terms of the GNU Lesser General Public # @@ -19,6 +19,7 @@
import argparse import concurrent.futures +import csv import http.client import ipaddress import json @@ -1033,36 +1034,42 @@ class CLI(object): def _import_as_names_from_arin(self): downloader = location.importer.Downloader()
- # XXX: Download AS names file from ARIN (note that these names appear to be quite - # technical, not intended for human consumption, as description fields in - # organisation handles for other RIRs are - however, this is what we have got, - # and in some cases, it might be still better than nothing) - for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"): - # Valid lines start with a space, followed by the number of the Autonomous System ... - if not line.startswith(" "): + # Download AS names file from ARIN and load it into CSV parser + for line in downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv"): + + # Valid lines start with a " ... + if not line.startswith("""): continue
# Split line and check if there is a valid ASN in it... - asn, name = line.split()[0:2] + for row in csv.reader([line]): + orgname = row[0] + orghandle = row[1] + firstasn = row[3] + lastasn = row[4]
try: - asn = int(asn) + firstasn = int(firstasn.strip(""")) + lastasn = int(lastasn.strip(""")) except ValueError: - log.debug("Skipping ARIN AS names line not containing an integer for ASN") + log.debug("Skipping ARIN AS names line not containing valid integers for ASN") continue
# Filter invalid ASNs... - if not self._check_parsed_asn(asn): + if not self._check_parsed_asn(firstasn): continue
- # Skip any AS name that appears to be a placeholder for a different RIR or entity... - if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|-)", name): + if firstasn > lastasn: + continue + + # Filter any bulk AS assignments, since these are present for other RIRs where + # we get better data from elsewhere. + if not firstasn == lastasn: continue
- # Bail out in case the AS name contains anything we do not expect here... - if re.search(r"[^a-zA-Z0-9-_]", name): - log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \ - (asn, name)) + # Skip any AS name that appears to be a placeholder for a different RIR or entity... + if re.match(r"^(AFRINIC|APNIC|LACNIC|RIPE)$", orghandle.strip(""")): + continue
# Things look good here, run INSERT statement and skip this one if we already have # a (better?) name for this Autonomous System... @@ -1073,8 +1080,8 @@ class CLI(object): source ) VALUES (%s, %s, %s) ON CONFLICT (number) DO NOTHING""", - asn, - name, + firstasn, + orgname.strip("""), "ARIN", )
Hello Peter,
On 10 Dec 2023, at 19:37, Peter Müller peter.mueller@ipfire.org wrote:
This patch replaces our previous source for AS names in ARIN's realms with another file provided by ARIN that contains human-readable names for organizations ASNs have been allocated to.
Please note that a
TRUNCATE autnums;
is necessary on machines previously running the old version of location-importer, in order to make use of this changed data source.
It would be great if we could avoid any manual interaction in the future...
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 47 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 28a4f6c..96b3a20 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -3,7 +3,7 @@ # # # libloc - A library to determine the location of someone on the Internet # # # -# Copyright (C) 2020-2022 IPFire Development Team info@ipfire.org # +# Copyright (C) 2020-2023 IPFire Development Team info@ipfire.org # # # # This library is free software; you can redistribute it and/or # # modify it under the terms of the GNU Lesser General Public # @@ -19,6 +19,7 @@
import argparse import concurrent.futures +import csv import http.client import ipaddress import json @@ -1033,36 +1034,42 @@ class CLI(object): def _import_as_names_from_arin(self): downloader = location.importer.Downloader()
- # XXX: Download AS names file from ARIN (note that these names appear to be quite
- # technical, not intended for human consumption, as description fields in
- # organisation handles for other RIRs are - however, this is what we have got,
- # and in some cases, it might be still better than nothing)
- for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
- # Valid lines start with a space, followed by the number of the Autonomous System ...
- if not line.startswith(" "):
- # Download AS names file from ARIN and load it into CSV parser
- for line in downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv"):
- # Valid lines start with a " ...
- if not line.startswith("""):
continue
I would prefer trying to parse first, because it could still be valid CSV even without the quotes.
# Split line and check if there is a valid ASN in it...
- asn, name = line.split()[0:2]
- for row in csv.reader([line]):
- orgname = row[0]
- orghandle = row[1]
- firstasn = row[3]
- lastasn = row[4]
This works, but since I like to give lessons how to write code in a more readable Python fashion, I would have written this as follow:
orgname, orghandle, _, firstasn, lastasn = row
In case you really cannot rely on the number of fields, wrap this around a try/except block that catches any lines that are shorter. We should expect that any additional fields are being added at any time.
try:
- asn = int(asn)
- firstasn = int(firstasn.strip("""))
- lastasn = int(lastasn.strip("""))
If you have to strip the quotes, you probably are using the CSV parser with an incorrect configuration.
You might need to register a new dialect like so:
csv.register_dialect(“arin", delimiter=“,", quoting=csv.QUOTE_ALL, quotechar="\”")
You can then initialise the reader like so:
reader = csv.DictReader(f, dialect=“arin”)
If you are feeling really fancy and want to avoid parsing fields in a complicated way, you can even populate the fieldnames parameter.
except ValueError:
- log.debug("Skipping ARIN AS names line not containing an integer for ASN")
- log.debug("Skipping ARIN AS names line not containing valid integers for ASN")
continue
# Filter invalid ASNs...
- if not self._check_parsed_asn(asn):
- if not self._check_parsed_asn(firstasn):
continue
- # Skip any AS name that appears to be a placeholder for a different RIR or entity...
- if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|-)", name):
- if firstasn > lastasn:
- continue
- # Filter any bulk AS assignments, since these are present for other RIRs where
- # we get better data from elsewhere.
- if not firstasn == lastasn:
continue
- # Bail out in case the AS name contains anything we do not expect here...
- if re.search(r"[^a-zA-Z0-9-_]", name):
- log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
- (asn, name))
- # Skip any AS name that appears to be a placeholder for a different RIR or entity...
- if re.match(r"^(AFRINIC|APNIC|LACNIC|RIPE)$", orghandle.strip(""")):
- continue
This looks all okay.
# Things look good here, run INSERT statement and skip this one if we already have # a (better?) name for this Autonomous System... @@ -1073,8 +1080,8 @@ class CLI(object): source ) VALUES (%s, %s, %s) ON CONFLICT (number) DO NOTHING""",
- asn,
- name,
- firstasn,
- orgname.strip("""),
With the fixed CSV parser, you will not need to strip here. This also has the downside of removing any quotes in the middle of the string. Not sure whether that is a likely case, but nevertheless, we should use the data that we receive unchanged unless we have a strong reason to change anything here.
Best, -Michael
"ARIN", )
-- 2.35.3
Hello Peter,
I have largely rewritten this patch based on the CSV parser:
https://git.ipfire.org/?p=location/libloc.git;a=commitdiff;h=18a72eac51076b2...
-Michael
On 13 Dec 2023, at 11:45, Michael Tremer michael.tremer@ipfire.org wrote:
Hello Peter,
On 10 Dec 2023, at 19:37, Peter Müller peter.mueller@ipfire.org wrote:
This patch replaces our previous source for AS names in ARIN's realms with another file provided by ARIN that contains human-readable names for organizations ASNs have been allocated to.
Please note that a
TRUNCATE autnums;
is necessary on machines previously running the old version of location-importer, in order to make use of this changed data source.
It would be great if we could avoid any manual interaction in the future...
Signed-off-by: Peter Müller peter.mueller@ipfire.org
src/scripts/location-importer.in | 47 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 28a4f6c..96b3a20 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -3,7 +3,7 @@ # # # libloc - A library to determine the location of someone on the Internet # # # -# Copyright (C) 2020-2022 IPFire Development Team info@ipfire.org # +# Copyright (C) 2020-2023 IPFire Development Team info@ipfire.org # # # # This library is free software; you can redistribute it and/or # # modify it under the terms of the GNU Lesser General Public # @@ -19,6 +19,7 @@
import argparse import concurrent.futures +import csv import http.client import ipaddress import json @@ -1033,36 +1034,42 @@ class CLI(object): def _import_as_names_from_arin(self): downloader = location.importer.Downloader()
- # XXX: Download AS names file from ARIN (note that these names appear to be quite
- # technical, not intended for human consumption, as description fields in
- # organisation handles for other RIRs are - however, this is what we have got,
- # and in some cases, it might be still better than nothing)
- for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
- # Valid lines start with a space, followed by the number of the Autonomous System ...
- if not line.startswith(" "):
- # Download AS names file from ARIN and load it into CSV parser
- for line in downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv"):
- # Valid lines start with a " ...
- if not line.startswith("""):
continue
I would prefer trying to parse first, because it could still be valid CSV even without the quotes.
# Split line and check if there is a valid ASN in it...
- asn, name = line.split()[0:2]
- for row in csv.reader([line]):
- orgname = row[0]
- orghandle = row[1]
- firstasn = row[3]
- lastasn = row[4]
This works, but since I like to give lessons how to write code in a more readable Python fashion, I would have written this as follow:
orgname, orghandle, _, firstasn, lastasn = row
In case you really cannot rely on the number of fields, wrap this around a try/except block that catches any lines that are shorter. We should expect that any additional fields are being added at any time.
try:
- asn = int(asn)
- firstasn = int(firstasn.strip("""))
- lastasn = int(lastasn.strip("""))
If you have to strip the quotes, you probably are using the CSV parser with an incorrect configuration.
You might need to register a new dialect like so:
csv.register_dialect(“arin", delimiter=“,", quoting=csv.QUOTE_ALL, quotechar="\”")
You can then initialise the reader like so:
reader = csv.DictReader(f, dialect=“arin”)
If you are feeling really fancy and want to avoid parsing fields in a complicated way, you can even populate the fieldnames parameter.
except ValueError:
- log.debug("Skipping ARIN AS names line not containing an integer for ASN")
- log.debug("Skipping ARIN AS names line not containing valid integers for ASN")
continue
# Filter invalid ASNs...
- if not self._check_parsed_asn(asn):
- if not self._check_parsed_asn(firstasn):
continue
- # Skip any AS name that appears to be a placeholder for a different RIR or entity...
- if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|-)", name):
- if firstasn > lastasn:
- continue
- # Filter any bulk AS assignments, since these are present for other RIRs where
- # we get better data from elsewhere.
- if not firstasn == lastasn:
continue
- # Bail out in case the AS name contains anything we do not expect here...
- if re.search(r"[^a-zA-Z0-9-_]", name):
- log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
- (asn, name))
- # Skip any AS name that appears to be a placeholder for a different RIR or entity...
- if re.match(r"^(AFRINIC|APNIC|LACNIC|RIPE)$", orghandle.strip(""")):
- continue
This looks all okay.
# Things look good here, run INSERT statement and skip this one if we already have # a (better?) name for this Autonomous System... @@ -1073,8 +1080,8 @@ class CLI(object): source ) VALUES (%s, %s, %s) ON CONFLICT (number) DO NOTHING""",
- asn,
- name,
- firstasn,
- orgname.strip("""),
With the fixed CSV parser, you will not need to strip here. This also has the downside of removing any quotes in the middle of the string. Not sure whether that is a likely case, but nevertheless, we should use the data that we receive unchanged unless we have a strong reason to change anything here.
Best, -Michael
"ARIN", )
-- 2.35.3