[PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.

public inbox for development@lists.ipfire.org
 help / color / mirror / Atom feed

* [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
@ 2025-03-22 14:57 Stefan Schantl
  2025-03-22 14:57 ` [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function Stefan Schantl
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Stefan Schantl @ 2025-03-22 14:57 UTC (permalink / raw)
  To: development; +Cc: Stefan Schantl

This function can be used to grab content and/or store it into files.

Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
---
 config/cfgroot/general-functions.pl | 262 ++++++++++++++++++++++++++++
 1 file changed, 262 insertions(+)

diff --git a/config/cfgroot/general-functions.pl b/config/cfgroot/general-functions.pl
index 8ba6e3f79..cb8df69c6 100644
--- a/config/cfgroot/general-functions.pl
+++ b/config/cfgroot/general-functions.pl
@@ -20,6 +20,23 @@ use IO::Socket;
 use Net::SSLeay;
 use Net::IPv4Addr qw(:all);
 
+# Load module to move files.
+use File::Copy;
+
+# Load module to get file stats.
+use File::stat;
+
+# Load module to deal with temporary files.
+use File::Temp;
+
+# Load module to deal with the date formats used by the HTTP protocol.
+use HTTP::Date;
+
+# Load the libwwwperl User Agent module.
+use LWP::UserAgent;
+
+$|=1; # line buffering
+
 $General::version = 'VERSION';
 $General::swroot = 'CONFIG_ROOT';
 $General::noipprefix = 'noipg-';
@@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
 	return %protocols;
 }
 
+# Function to grab a given URL content or to download and store it on disk.
+#
+# The function requires a configuration hash to be passed.
+#
+# The following options (hash keys) are supported:
+#
+# URL -> The URL to the content or file. REQUIRED!
+# FILE -> The filename as fullpath where the content/file should be stored on disk.
+# ETAGSFILE -> A filename again as fullpath where Etags should be stored and read.
+# ETAGPREFIX -> In case a custom etag name should be used, otherwise it defaults to the given URL.
+# MAXSIZE -> In bytes until the downloader will abort downloading. (example: 10_485_760 for 10MB)
+#
+# If a file is given an If-Modified-Since header will be generated from the last modified timestamp
+# of an already stored file. In case an Etag file is specified an If-None-Match header will be added to
+# the request - Both can be used at the same time.
+#
+# In case no FILE option has been passed to the function, the content of the requested URL will be returned.
+#
+# Return codes (if FILE is used):
+#
+# nothing - On success
+# no url - If no URL has been specified.
+# not_modified - In case the servers responds with "Not modified" (304)
+# dl_error - If the requested URL cannot be accessed.
+# incomplete download - In case the size of the local file does not match the remote content_lenght.
+#
+sub downloader (%) {
+	my (%args) = @_;
+
+	# Remap args hash and convert all keys into upper case format.
+	%args = map { uc $_ => $args{$_} } keys %args;
+
+	# The amount of download attempts before giving up and
+	# logging an error.
+	my $max_dl_attempts = 3;
+
+	# Temporary directory to download the files.
+	my $tmp_dl_directory = "/var/tmp";
+
+	# Assign hash values.
+	my $url = $args{"URL"} if (exists($args{"URL"}));
+	my $file = $args{"FILE"} if (exists($args{"FILE"}));
+	my $etags_file = $args{"ETAGSFILE"} if (exists($args{"ETAGSFILE"}));
+	my $etagprefix = $url;
+	$etagprefix = $args{"ETAGPREFIX"} if (exists($args{"ETAGPREFIX"}));
+	my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));
+
+	# Abort with error "no url", if no URL has been given.
+	return "no url" unless ($url);
+
+	my %etags = ();
+	my $tmpfile;
+
+	# Read-in proxysettings.
+	my %proxysettings=();
+	&readhash("${General::swroot}/proxy/settings", \%proxysettings);
+
+	# Create a user agent instance.
+	#
+	# Request SSL hostname verification and specify path
+	# to the CA file.
+	my $ua = LWP::UserAgent->new(
+		ssl_opts => {
+			SSL_ca_file     => '/etc/ssl/cert.pem',
+			verify_hostname => 1,
+		},
+	);
+
+	# Set timeout to 10 seconds.
+	$ua->timeout(10);
+
+	# Assign maximum download size if set.
+	$ua->max_size($max_size) if ($max_size);
+
+	# Generate UserAgent.
+	my $agent = &MakeUserAgent();
+
+	# Set the generated UserAgent.
+	$ua->agent($agent);
+
+	# Check if an upstream proxy is configured.
+	if ($proxysettings{'UPSTREAM_PROXY'}) {
+		my $proxy_url;
+
+		$proxy_url = "http://";
+
+		# Check if the proxy requires authentication.
+		if (($proxysettings{'UPSTREAM_USER'}) && ($proxysettings{'UPSTREAM_PASSWORD'})) {
+			$proxy_url .= "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD'}\@";
+		}
+
+		# Add proxy server address and port.
+		$proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
+
+		# Append proxy settings.
+		$ua->proxy(['http', 'https'], $proxy_url);
+	}
+
+	# Create a HTTP request element and pass the given URL to it.
+	my $request = HTTP::Request->new(GET => $url);
+
+	# Check if a file to store the output has been provided.
+	if ($file) {
+		# Check if the given file already exits, because it has been downloaded in the past.
+		#
+		# In this case we are requesting the server if the remote file has been changed or not.
+		# This will be done by sending the modification time in a special HTTP header.
+		if (-f $file) {
+			# Call stat on the file.
+			my $stat = stat($file);
+
+			# Omit the mtime of the existing file.
+			my $mtime = $stat->mtime;
+
+			# Convert the timestamp into right format.
+			my $http_date = time2str($mtime);
+
+			# Add the If-Modified-Since header to the request to ask the server if the
+			# file has been modified.
+			$request->header( 'If-Modified-Since' => "$http_date" );
+		}
+
+		# Generate temporary file name, located in the tempoary download directory and with a suffix of ".tmp".
+		# The downloaded file will be stored there until some sanity checks are performed.
+		my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "$tmp_dl_directory/", UNLINK => 0 );
+		$tmpfile = $tmp->filename();
+	}
+
+	# Check if an file for etags has been given.
+	if ($etags_file) {
+		# Read-in Etags file for known Etags if the file is present.
+		&readhash("$etags_file", \%etags) if (-f $etags_file);
+
+		# Check if an Etag for the current provider is stored.
+		if ($etags{$etagprefix}) {
+			# Grab the stored tag.
+			my $etag = $etags{$etagprefix};
+
+			# Add an "If-None-Match header to the request to ask the server if the
+			# file has been modified.
+			$request->header( 'If-None-Match' => $etag );
+		}
+	}
+
+	my $dl_attempt = 1;
+	my $response;
+
+	# Download and retry on failure.
+	while ($dl_attempt <= $max_dl_attempts) {
+		# Perform the request and save the output into the tmpfile if requested.
+		$response = $ua->request($request, $tmpfile);
+
+		# Check if the download was successfull.
+		if($response->is_success) {
+			# Break loop.
+			last;
+
+		# Check if the server responds with 304 (Not Modified).
+		} elsif ($response->code == 304) {
+			# Remove temporary file, if one exists.
+			unlink("$tmpfile") if (-e "$tmpfile");
+
+			# Return "not modified".
+			return "not modified";
+
+		# Check if we ran out of download re-tries.
+		} elsif ($dl_attempt eq $max_dl_attempts) {
+			# Obtain error.
+			my $error = $response->content;
+
+			# Remove temporary file, if one exists.
+			unlink("$tmpfile") if (-e "$tmpfile");
+
+			# Return the error message from response..
+			return "$error";
+		}
+
+		# Remove temporary file, if one exists.
+		unlink("$tmpfile") if (-e "$tmpfile");
+
+		# Increase download attempt counter.
+		$dl_attempt++;
+	}
+
+	# Obtain the connection headers.
+	my $headers = $response->headers;
+
+	# Check if an Etag file has been provided.
+	if ($etags_file) {
+		# Grab the Etag from the response if the server provides one.
+		if ($response->header('Etag')) {
+			# Add the provided Etag to the hash of tags.
+			$etags{$etagprefix} = $response->header('Etag');
+
+			# Write the etags file.
+			&writehash($etags_file, \%etags);
+		}
+	}
+
+	# Check if the response should be stored on disk.
+	if ($file) {
+		# Get the remote size of the content.
+		my $remote_size = $response->header('Content-Length');
+
+		# Perform a stat on the temporary file.
+		my $stat = stat($tmpfile);
+
+		# Grab the size of the stored temporary file.
+		my $local_size = $stat->size;
+
+		# Check if both sizes are equal.
+		if(($remote_size) && ($remote_size ne $local_size)) {
+			# Delete the temporary file.
+			unlink("$tmpfile");
+
+			# Abort and return "incomplete download" as error.
+			return "incomplete download";
+		}
+
+		# Move the temporaray file to the desired file by overwriting a may
+		# existing one.
+		move("$tmpfile", "$file");
+
+		# Omit the timestamp from response header, when the file has been modified the
+		# last time.
+		my $last_modified = $headers->last_modified;
+
+		# Check if we got a last-modified value from the server.
+		if ($last_modified) {
+			# Assign the last-modified timestamp as mtime to the
+			# stored file.
+			utime(time(), "$last_modified", "$file");
+		}
+
+		# Delete temporary file.
+		unlink("$tmpfile");
+
+		# If we got here, everything worked fine. Return nothing.
+		return;
+	} else {
+		# Decode the response content and return it.
+		return $response->decoded_content;
+	}
+}
+
 # Cloud Stuff
 
 sub running_in_cloud() {
-- 
2.47.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function.
  2025-03-22 14:57 [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function Stefan Schantl
@ 2025-03-22 14:57 ` Stefan Schantl
  2025-03-23 12:08   ` Michael Tremer
  2025-03-22 14:57 ` [PATCH 3/3] ids-functions.pl: Use new general downloader function Stefan Schantl
  2025-03-23 12:08 ` [PATCH 1/3] general-functions.pl: Add LWP-based flexible " Michael Tremer
  2 siblings, 1 reply; 9+ messages in thread
From: Stefan Schantl @ 2025-03-22 14:57 UTC (permalink / raw)
  To: development; +Cc: Stefan Schantl

This helps to drop the Net::SSLeay module and to remove a lot of legacy
code.

Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
---
 config/cfgroot/general-functions.pl | 32 +++++++++++++----------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/config/cfgroot/general-functions.pl b/config/cfgroot/general-functions.pl
index cb8df69c6..a132cf315 100644
--- a/config/cfgroot/general-functions.pl
+++ b/config/cfgroot/general-functions.pl
@@ -17,7 +17,6 @@ package General;
 use strict;
 use Socket;
 use IO::Socket;
-use Net::SSLeay;
 use Net::IPv4Addr qw(:all);
 
 # Load module to move files.
@@ -979,23 +978,20 @@ sub findhasharraykey {
 }
 
 sub FetchPublicIp {
-    my %proxysettings;
-    &General::readhash("${General::swroot}/proxy/settings", \%proxysettings);
-    if ($_=$proxysettings{'UPSTREAM_PROXY'}) {
-        my ($peer, $peerport) = (/^(?:[a-zA-Z ]+\:\/\/)?(?:[A-Za-z0-9\_\.\-]*?(?:\:[A-Za-z0-9\_\.\-]*?)?\@)?([a-zA-Z0-9\.\_\-]*?)(?:\:([0-9]{1,5}))?(?:\/.*?)?$/);
-        Net::SSLeay::set_proxy($peer,$peerport,$proxysettings{'UPSTREAM_USER'},$proxysettings{'UPSTREAM_PASSWORD'} );
-    }
-    my $user_agent = &MakeUserAgent();
-    my ($out, $response) = Net::SSLeay::get_http(  'checkip4.dns.lightningwirelabs.com',
-                				    80,
-        					    "/",
-						    Net::SSLeay::make_headers('User-Agent' => $user_agent )
-						);
-    if ($response =~ m%HTTP/1\.. 200 OK%) {
-	$out =~ /Your IP address is: (\d+.\d+.\d+.\d+)/;
-	return $1;
-    }
-    return '';
+	# URL to grab the public IP.
+	my $url = "https://checkip4.dns.lightningwirelabs.com";
+
+	# Call downloader to fetch the public IP.
+	my $response = &downloader("URL" => $url);
+
+	# Omit the address from the resonse message.
+	if ($response =~ /Your IP address is: (\d+.\d+.\d+.\d+)/) {
+		# Return the address.
+		return $1;
+	}
+
+	# Unable to grab the address - Return nothing.
+	return;
 }
 
 #
-- 
2.47.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/3] ids-functions.pl: Use new general downloader function.
  2025-03-22 14:57 [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function Stefan Schantl
  2025-03-22 14:57 ` [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function Stefan Schantl
@ 2025-03-22 14:57 ` Stefan Schantl
  2025-03-23 12:08 ` [PATCH 1/3] general-functions.pl: Add LWP-based flexible " Michael Tremer
  2 siblings, 0 replies; 9+ messages in thread
From: Stefan Schantl @ 2025-03-22 14:57 UTC (permalink / raw)
  To: development; +Cc: Stefan Schantl

Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
---
 config/cfgroot/ids-functions.pl | 196 +++-----------------------------
 1 file changed, 17 insertions(+), 179 deletions(-)

diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl
index 399f5cbf8..9f548e2e7 100644
--- a/config/cfgroot/ids-functions.pl
+++ b/config/cfgroot/ids-functions.pl
@@ -130,9 +130,6 @@ my $suricatactrl = "/usr/local/bin/suricatactrl";
 # Prefix for each downloaded ruleset.
 my $dl_rulesfile_prefix = "idsrules";
 
-# Temporary directory to download the rules files.
-my $tmp_dl_directory = "/var/tmp";
-
 # Temporary directory where the rulesets will be extracted.
 my $tmp_directory = "/tmp/ids_tmp";
 
@@ -299,61 +296,13 @@ sub checkdiskspace () {
 #
 ## This function is responsible for downloading the ruleset for a given provider.
 ##
-## * At first it initialize the downloader and sets an upstream proxy if configured.
-## * The next step will be to generate the final download url, by obtaining the URL for the desired
-##   ruleset and add the settings for the upstream proxy.
-## * Finally the function will grab the rule file or tarball from the server.
-##   It tries to reduce the amount of download by using the "If-Modified-Since" HTTP header.
-#
-## Return codes:
-##
-## * "no url" - If no download URL could be gathered for the provider.
-## * "not modified" - In case the already stored rules file is up to date.
-## * "incomplete download" - When the remote file size differs from the downloaded file size.
-## * "$error" - The error message generated from the LWP::User Agent module.
+## It uses the LWP-based downloader function from the general-functions.pl to
+## download the ruleset for a requested provider.
 #
 sub downloadruleset ($) {
 	my ($provider) = @_;
 
-	# The amount of download attempts before giving up and
-	# logging an error.
-	my $max_dl_attempts = 3;
-
-	# Read proxysettings.
-	my %proxysettings=();
-	&General::readhash("${General::swroot}/proxy/settings", \%proxysettings);
-
-	# Init the download module.
-	#
-	# Request SSL hostname verification and specify path
-	# to the CA file.
-	my $downloader = LWP::UserAgent->new(
-		ssl_opts => {
-			SSL_ca_file     => '/etc/ssl/cert.pem',
-			verify_hostname => 1,
-		}
-	);
-
-	# Set timeout to 10 seconds.
-	$downloader->timeout(10);
-
-	# Check if an upstream proxy is configured.
-	if ($proxysettings{'UPSTREAM_PROXY'}) {
-		my $proxy_url;
-
-		$proxy_url = "http://";
-
-		# Check if the proxy requires authentication.
-		if (($proxysettings{'UPSTREAM_USER'}) && ($proxysettings{'UPSTREAM_PASSWORD'})) {
-			$proxy_url .= "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD'}\@";
-		}
-
-		# Add proxy server address and port.
-		$proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
-
-		# Setup proxy settings.
-		$downloader->proxy(['http', 'https'], $proxy_url);
-	}
+	my %settings = ();
 
 	# Grab the download url for the provider.
 	my $url = $IDS::Ruleset::Providers{$provider}{'dl_url'};
@@ -371,141 +320,30 @@ sub downloadruleset ($) {
 	# Abort and return "no url", if no url could be determined for the provider.
 	return "no url" unless ($url);
 
-	# Pass the requested URL to the downloader.
-	my $request = HTTP::Request->new(GET => $url);
-
-	# Generate temporary file name, located in the tempoary download directory and with a suffix of ".tmp".
-	# The downloaded file will be stored there until some sanity checks are performed.
-	my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "$tmp_dl_directory/", UNLINK => 0 );
-	my $tmpfile = $tmp->filename();
+	# Pass the requested URL to the settings hash.
+	$settings{'URL'} = $url;
 
 	# Call function to get the final path and filename for the downloaded file.
 	my $dl_rulesfile = &_get_dl_rulesfile($provider);
 
-	# Check if the rulesfile already exits, because it has been downloaded in the past.
-	#
-	# In this case we are requesting the server if the remote file has been changed or not.
-	# This will be done by sending the modification time in a special HTTP header.
-	if (-f $dl_rulesfile) {
-		# Call stat on the file.
-		my $stat = stat($dl_rulesfile);
-
-		# Omit the mtime of the existing file.
-		my $mtime = $stat->mtime;
-
-		# Convert the timestamp into right format.
-		my $http_date = time2str($mtime);
+	# Add the file information to the settings hash.
+	$settings{'FILE'} = $dl_rulesfile;
 
-		# Add the If-Modified-Since header to the request to ask the server if the
-		# file has been modified.
-		$request->header( 'If-Modified-Since' => "$http_date" );
-	}
-
-	# Read-in Etags file for known Etags if the file is present.
-	my %etags = ();
-	&General::readhash("$etags_file", \%etags) if (-f $etags_file);
+	# Add Etag details to the settings hash.
+	$settings{'ETAGSFILE'} = $etags_file;
+	$settings{'ETAGPREFIX'} = $provider;
 
-	# Check if an Etag for the current provider is stored.
-	if ($etags{$provider}) {
-		# Grab the stored tag.
-		my $etag = $etags{$provider};
+	# Call the downloader and pass the settings hash.
+	my $response = &General::downloader(%settings);
 
-		# Add an "If-None-Match header to the request to ask the server if the
-		# file has been modified.
-		$request->header( 'If-None-Match' => $etag );
+	# Return the response message if the downloader provided one.
+	if ($response) {
+		return $response;
 	}
 
-	my $dl_attempt = 1;
-	my $response;
-
-	# Download and retry on failure.
-	while ($dl_attempt <= $max_dl_attempts) {
-		# Perform the request and save the output into the tmpfile.
-		$response = $downloader->request($request, $tmpfile);
-
-		# Check if the download was successfull.
-		if($response->is_success) {
-			# Break loop.
-			last;
-
-		# Check if the server responds with 304 (Not Modified).
-		} elsif ($response->code == 304) {
-			# Remove temporary file, if one exists.
-			unlink("$tmpfile") if (-e "$tmpfile");
-
-			# Return "not modified".
-			return "not modified";
-
-		# Check if we ran out of download re-tries.
-		} elsif ($dl_attempt eq $max_dl_attempts) {
-			# Obtain error.
-			my $error = $response->content;
-
-			# Remove temporary file, if one exists.
-			unlink("$tmpfile") if (-e "$tmpfile");
-
-			# Return the error message from response..
-			return "$error";
-		}
-
-		# Remove temporary file, if one exists.
-		unlink("$tmpfile") if (-e "$tmpfile");
-
-		# Increase download attempt counter.
-		$dl_attempt++;
-	}
-
-	# Obtain the connection headers.
-	my $headers = $response->headers;
-
-	# Get the timestamp from header, when the file has been modified the
-	# last time.
-	my $last_modified = $headers->last_modified;
-
-	# Get the remote size of the downloaded file.
-	my $remote_filesize = $headers->content_length;
-
-	# Grab the Etag from response it the server provides one.
-	if ($response->header('Etag')) {
-		# Add the Etag to the etags hash.
-		$etags{$provider} = $response->header('Etag');
-
-		# Write the etags file.
-		&General::writehash($etags_file, \%etags);
-	}
-
-	# Perform stat on the tmpfile.
-	my $stat = stat($tmpfile);
-
-	# Grab the local filesize of the downloaded tarball.
-	my $local_filesize = $stat->size;
-
-	# Check if both file sizes match.
-	if (($remote_filesize) && ($remote_filesize ne $local_filesize)) {
-		# Delete temporary file.
-		unlink("$tmpfile");
-
-		# Return "1" - false.
-		return "incomplete download";
-	}
-
-	# Overwrite the may existing rulefile or tarball with the downloaded one.
-	move("$tmpfile", "$dl_rulesfile");
-
-	# Check if we got a last-modified value from the server.
-	if ($last_modified) {
-		# Assign the last-modified timestamp as mtime to the
-		# rules file.
-		utime(time(), "$last_modified", "$dl_rulesfile");
-	}
-
-	# Delete temporary file.
-	unlink("$tmpfile");
-
-	# Set correct ownership for the tarball.
-	set_ownership("$dl_rulesfile");
+	# Set correct ownership for the downloaded rules file.
+	&set_ownership("$dl_rulesfile");
 
-	# If we got here, everything worked fine. Return nothing.
 	return;
 }
 
-- 
2.47.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
  2025-03-22 14:57 [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function Stefan Schantl
  2025-03-22 14:57 ` [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function Stefan Schantl
  2025-03-22 14:57 ` [PATCH 3/3] ids-functions.pl: Use new general downloader function Stefan Schantl
@ 2025-03-23 12:08 ` Michael Tremer
  2025-03-23 19:00   ` Stefan Schantl
  2 siblings, 1 reply; 9+ messages in thread
From: Michael Tremer @ 2025-03-23 12:08 UTC (permalink / raw)
  To: Stefan Schantl; +Cc: development

Morning Stefan,

Thank you for taking the time and getting this out of your git repository and onto this list :)

As we are already shipping this code and this patch only moves it around, there is probably not much standing in the way of seeing this being merged. I have however a couple of questions that we might want to address in a follow-up patch.

> On 22 Mar 2025, at 14:57, Stefan Schantl <stefan.schantl@ipfire.org> wrote:
> 
> This function can be used to grab content and/or store it into files.
> 
> Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
> ---
> config/cfgroot/general-functions.pl | 262 ++++++++++++++++++++++++++++
> 1 file changed, 262 insertions(+)
> 
> diff --git a/config/cfgroot/general-functions.pl b/config/cfgroot/general-functions.pl
> index 8ba6e3f79..cb8df69c6 100644
> --- a/config/cfgroot/general-functions.pl
> +++ b/config/cfgroot/general-functions.pl
> @@ -20,6 +20,23 @@ use IO::Socket;
> use Net::SSLeay;
> use Net::IPv4Addr qw(:all);
> 
> +# Load module to move files.
> +use File::Copy;
> +
> +# Load module to get file stats.
> +use File::stat;
> +
> +# Load module to deal with temporary files.
> +use File::Temp;
> +
> +# Load module to deal with the date formats used by the HTTP protocol.
> +use HTTP::Date;
> +
> +# Load the libwwwperl User Agent module.
> +use LWP::UserAgent;

Recently (let’s say a year ago) I have spent some time on making the web UI faster. Especially on some hardware, is it not very snappy. The reason for the slowness however was not that we are doing much stuff in our code; it was rather that loading modules was taking a long time in Perl. So I removed what was no longer needed and moved some other stuff around which slightly helped (saved around 100ms on the older IPFire Mini Appliance).

Since the downloading functionality is not used *that* much here. Should this maybe not live in a separate place and we only include it when it is needed?

> +
> +$|=1; # line buffering

Why is this needed?

> +
> $General::version = 'VERSION';
> $General::swroot = 'CONFIG_ROOT';
> $General::noipprefix = 'noipg-';
> @@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
> return %protocols;
> }
> 
> +# Function to grab a given URL content or to download and store it on disk.
> +#
> +# The function requires a configuration hash to be passed.
> +#
> +# The following options (hash keys) are supported:
> +#
> +# URL -> The URL to the content or file. REQUIRED!
> +# FILE -> The filename as fullpath where the content/file should be stored on disk.
> +# ETAGSFILE -> A filename again as fullpath where Etags should be stored and read.
> +# ETAGPREFIX -> In case a custom etag name should be used, otherwise it defaults to the given URL.
> +# MAXSIZE -> In bytes until the downloader will abort downloading. (example: 10_485_760 for 10MB)
> +#
> +# If a file is given an If-Modified-Since header will be generated from the last modified timestamp
> +# of an already stored file. In case an Etag file is specified an If-None-Match header will be added to
> +# the request - Both can be used at the same time.
> +#
> +# In case no FILE option has been passed to the function, the content of the requested URL will be returned.
> +#
> +# Return codes (if FILE is used):
> +#
> +# nothing - On success
> +# no url - If no URL has been specified.
> +# not_modified - In case the servers responds with "Not modified" (304)
> +# dl_error - If the requested URL cannot be accessed.
> +# incomplete download - In case the size of the local file does not match the remote content_lenght.
> +#
> +sub downloader (%) {
> + my (%args) = @_;
> +
> + # Remap args hash and convert all keys into upper case format.
> + %args = map { uc $_ => $args{$_} } keys %args;
> +
> + # The amount of download attempts before giving up and
> + # logging an error.
> + my $max_dl_attempts = 3;
> +
> + # Temporary directory to download the files.
> + my $tmp_dl_directory = "/var/tmp";
> +
> + # Assign hash values.
> + my $url = $args{"URL"} if (exists($args{"URL"}));
> + my $file = $args{"FILE"} if (exists($args{"FILE"}));
> + my $etags_file = $args{"ETAGSFILE"} if (exists($args{"ETAGSFILE"}));
> + my $etagprefix = $url;
> + $etagprefix = $args{"ETAGPREFIX"} if (exists($args{"ETAGPREFIX"}));
> + my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));

I consider this really ugly, but I suppose this is what Perl wants.

> + # Abort with error "no url", if no URL has been given.
> + return "no url" unless ($url);

Could we rather die here with an error? Just returning a string here seems to be too easy to ignore in case something above went wrong.

> +
> + my %etags = ();
> + my $tmpfile;
> +
> + # Read-in proxysettings.
> + my %proxysettings=();
> + &readhash("${General::swroot}/proxy/settings", \%proxysettings);
> +
> + # Create a user agent instance.
> + #
> + # Request SSL hostname verification and specify path
> + # to the CA file.
> + my $ua = LWP::UserAgent->new(
> + ssl_opts => {
> + SSL_ca_file     => '/etc/ssl/cert.pem',
> + verify_hostname => 1,
> + },
> + );
> +
> + # Set timeout to 10 seconds.
> + $ua->timeout(10);

A 10 second timeout also sounds rather optimistic considering some slower, overloaded mirrors or those that are quite far away. For any tasks like downloading data in the background we could afford longer.

> +
> + # Assign maximum download size if set.
> + $ua->max_size($max_size) if ($max_size);
> +
> + # Generate UserAgent.
> + my $agent = &MakeUserAgent();
> +
> + # Set the generated UserAgent.
> + $ua->agent($agent);
> +
> + # Check if an upstream proxy is configured.
> + if ($proxysettings{'UPSTREAM_PROXY'}) {
> + my $proxy_url;
> +
> + $proxy_url = "http://";
> +
> + # Check if the proxy requires authentication.
> + if (($proxysettings{'UPSTREAM_USER'}) && ($proxysettings{'UPSTREAM_PASSWORD'})) {
> + $proxy_url .= "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD'}\@";
> + }
> +
> + # Add proxy server address and port.
> + $proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
> +
> + # Append proxy settings.
> + $ua->proxy(['http', 'https'], $proxy_url);
> + }
> +
> + # Create a HTTP request element and pass the given URL to it.
> + my $request = HTTP::Request->new(GET => $url);
> +
> + # Check if a file to store the output has been provided.
> + if ($file) {
> + # Check if the given file already exits, because it has been downloaded in the past.
> + #
> + # In this case we are requesting the server if the remote file has been changed or not.
> + # This will be done by sending the modification time in a special HTTP header.
> + if (-f $file) {
> + # Call stat on the file.
> + my $stat = stat($file);
> +
> + # Omit the mtime of the existing file.
> + my $mtime = $stat->mtime;
> +
> + # Convert the timestamp into right format.
> + my $http_date = time2str($mtime);
> +
> + # Add the If-Modified-Since header to the request to ask the server if the
> + # file has been modified.
> + $request->header( 'If-Modified-Since' => "$http_date" );
> + }
> +
> + # Generate temporary file name, located in the tempoary download directory and with a suffix of ".tmp".
> + # The downloaded file will be stored there until some sanity checks are performed.
> + my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "$tmp_dl_directory/", UNLINK => 0 );
> + $tmpfile = $tmp->filename();
> + }
> +
> + # Check if an file for etags has been given.
> + if ($etags_file) {
> + # Read-in Etags file for known Etags if the file is present.
> + &readhash("$etags_file", \%etags) if (-f $etags_file);
> +
> + # Check if an Etag for the current provider is stored.
> + if ($etags{$etagprefix}) {
> + # Grab the stored tag.
> + my $etag = $etags{$etagprefix};
> +
> + # Add an "If-None-Match header to the request to ask the server if the
> + # file has been modified.
> + $request->header( 'If-None-Match' => $etag );
> + }
> + }
> +
> + my $dl_attempt = 1;
> + my $response;
> +
> + # Download and retry on failure.
> + while ($dl_attempt <= $max_dl_attempts) {
> + # Perform the request and save the output into the tmpfile if requested.
> + $response = $ua->request($request, $tmpfile);
> +
> + # Check if the download was successfull.
> + if($response->is_success) {
> + # Break loop.
> + last;
> +
> + # Check if the server responds with 304 (Not Modified).
> + } elsif ($response->code == 304) {
> + # Remove temporary file, if one exists.
> + unlink("$tmpfile") if (-e "$tmpfile");
> +
> + # Return "not modified".
> + return "not modified";

Same here. It feels like strings are not the most ideal return code.

> +
> + # Check if we ran out of download re-tries.
> + } elsif ($dl_attempt eq $max_dl_attempts) {
> + # Obtain error.
> + my $error = $response->content;
> +
> + # Remove temporary file, if one exists.
> + unlink("$tmpfile") if (-e "$tmpfile");
> +
> + # Return the error message from response..
> + return "$error";
> + }
> +
> + # Remove temporary file, if one exists.
> + unlink("$tmpfile") if (-e "$tmpfile");
> +
> + # Increase download attempt counter.
> + $dl_attempt++;
> + }
> +
> + # Obtain the connection headers.
> + my $headers = $response->headers;
> +
> + # Check if an Etag file has been provided.
> + if ($etags_file) {
> + # Grab the Etag from the response if the server provides one.
> + if ($response->header('Etag')) {
> + # Add the provided Etag to the hash of tags.
> + $etags{$etagprefix} = $response->header('Etag');
> +
> + # Write the etags file.
> + &writehash($etags_file, \%etags);
> + }
> + }
> +
> + # Check if the response should be stored on disk.
> + if ($file) {
> + # Get the remote size of the content.
> + my $remote_size = $response->header('Content-Length');
> +
> + # Perform a stat on the temporary file.
> + my $stat = stat($tmpfile);
> +
> + # Grab the size of the stored temporary file.
> + my $local_size = $stat->size;
> +
> + # Check if both sizes are equal.
> + if(($remote_size) && ($remote_size ne $local_size)) {
> + # Delete the temporary file.
> + unlink("$tmpfile");
> +
> + # Abort and return "incomplete download" as error.
> + return "incomplete download";
> + }
> +
> + # Move the temporaray file to the desired file by overwriting a may
> + # existing one.
> + move("$tmpfile", "$file");
> +
> + # Omit the timestamp from response header, when the file has been modified the
> + # last time.
> + my $last_modified = $headers->last_modified;
> +
> + # Check if we got a last-modified value from the server.
> + if ($last_modified) {
> + # Assign the last-modified timestamp as mtime to the
> + # stored file.
> + utime(time(), "$last_modified", "$file");
> + }
> +
> + # Delete temporary file.
> + unlink("$tmpfile");
> +
> + # If we got here, everything worked fine. Return nothing.
> + return;
> + } else {
> + # Decode the response content and return it.
> + return $response->decoded_content;
> + }
> +}
> +
> # Cloud Stuff
> 
> sub running_in_cloud() {
> -- 
> 2.47.2
> 
> 



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function.
  2025-03-22 14:57 ` [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function Stefan Schantl
@ 2025-03-23 12:08   ` Michael Tremer
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Tremer @ 2025-03-23 12:08 UTC (permalink / raw)
  To: Stefan Schantl; +Cc: development

This is much better and cleaner code than before. Well done!

> On 22 Mar 2025, at 14:57, Stefan Schantl <stefan.schantl@ipfire.org> wrote:
> 
> This helps to drop the Net::SSLeay module and to remove a lot of legacy
> code.
> 
> Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
> ---
> config/cfgroot/general-functions.pl | 32 +++++++++++++----------------
> 1 file changed, 14 insertions(+), 18 deletions(-)
> 
> diff --git a/config/cfgroot/general-functions.pl b/config/cfgroot/general-functions.pl
> index cb8df69c6..a132cf315 100644
> --- a/config/cfgroot/general-functions.pl
> +++ b/config/cfgroot/general-functions.pl
> @@ -17,7 +17,6 @@ package General;
> use strict;
> use Socket;
> use IO::Socket;
> -use Net::SSLeay;
> use Net::IPv4Addr qw(:all);
> 
> # Load module to move files.
> @@ -979,23 +978,20 @@ sub findhasharraykey {
> }
> 
> sub FetchPublicIp {
> -    my %proxysettings;
> -    &General::readhash("${General::swroot}/proxy/settings", \%proxysettings);
> -    if ($_=$proxysettings{'UPSTREAM_PROXY'}) {
> -        my ($peer, $peerport) = (/^(?:[a-zA-Z ]+\:\/\/)?(?:[A-Za-z0-9\_\.\-]*?(?:\:[A-Za-z0-9\_\.\-]*?)?\@)?([a-zA-Z0-9\.\_\-]*?)(?:\:([0-9]{1,5}))?(?:\/.*?)?$/);
> -        Net::SSLeay::set_proxy($peer,$peerport,$proxysettings{'UPSTREAM_USER'},$proxysettings{'UPSTREAM_PASSWORD'} );
> -    }
> -    my $user_agent = &MakeUserAgent();
> -    my ($out, $response) = Net::SSLeay::get_http(  'checkip4.dns.lightningwirelabs.com',
> -                    80,
> -            "/",
> -    Net::SSLeay::make_headers('User-Agent' => $user_agent )
> - );
> -    if ($response =~ m%HTTP/1\.. 200 OK%) {
> - $out =~ /Your IP address is: (\d+.\d+.\d+.\d+)/;
> - return $1;
> -    }
> -    return '';
> + # URL to grab the public IP.
> + my $url = "https://checkip4.dns.lightningwirelabs.com";
> +
> + # Call downloader to fetch the public IP.
> + my $response = &downloader("URL" => $url);
> +
> + # Omit the address from the resonse message.
> + if ($response =~ /Your IP address is: (\d+.\d+.\d+.\d+)/) {
> + # Return the address.
> + return $1;
> + }
> +
> + # Unable to grab the address - Return nothing.
> + return;
> }
> 
> #
> -- 
> 2.47.2
> 
> 



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
  2025-03-23 12:08 ` [PATCH 1/3] general-functions.pl: Add LWP-based flexible " Michael Tremer
@ 2025-03-23 19:00   ` Stefan Schantl
  2025-03-24 10:02     ` Michael Tremer
  0 siblings, 1 reply; 9+ messages in thread
From: Stefan Schantl @ 2025-03-23 19:00 UTC (permalink / raw)
  To: Michael Tremer; +Cc: development

Hello Michael,

thanks for the review and your detailed feedback.

You are right this patch mainly moves the downloader logic from the
ipblocklist feature into an own easy to use general function.

> Morning Stefan,
> 
> Thank you for taking the time and getting this out of your git
> repository and onto this list :)
> 
> As we are already shipping this code and this patch only moves it
> around, there is probably not much standing in the way of seeing this
> being merged. I have however a couple of questions that we might want
> to address in a follow-up patch.
> 
> > On 22 Mar 2025, at 14:57, Stefan Schantl
> > <stefan.schantl@ipfire.org> wrote:
> > 
> > This function can be used to grab content and/or store it into
> > files.
> > 
> > Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
> > ---
> > config/cfgroot/general-functions.pl | 262
> > ++++++++++++++++++++++++++++
> > 1 file changed, 262 insertions(+)
> > 
> > diff --git a/config/cfgroot/general-functions.pl
> > b/config/cfgroot/general-functions.pl
> > index 8ba6e3f79..cb8df69c6 100644
> > --- a/config/cfgroot/general-functions.pl
> > +++ b/config/cfgroot/general-functions.pl
> > @@ -20,6 +20,23 @@ use IO::Socket;
> > use Net::SSLeay;
> > use Net::IPv4Addr qw(:all);
> > 
> > +# Load module to move files.
> > +use File::Copy;
> > +
> > +# Load module to get file stats.
> > +use File::stat;
> > +
> > +# Load module to deal with temporary files.
> > +use File::Temp;
> > +
> > +# Load module to deal with the date formats used by the HTTP
> > protocol.
> > +use HTTP::Date;
> > +
> > +# Load the libwwwperl User Agent module.
> > +use LWP::UserAgent;
> 
> Recently (let’s say a year ago) I have spent some time on making the
> web UI faster. Especially on some hardware, is it not very snappy.
> The reason for the slowness however was not that we are doing much
> stuff in our code; it was rather that loading modules was taking a
> long time in Perl. So I removed what was no longer needed and moved
> some other stuff around which slightly helped (saved around 100ms on
> the older IPFire Mini Appliance).
> 
> Since the downloading functionality is not used *that* much here.
> Should this maybe not live in a separate place and we only include it
> when it is needed?

Sounds good, so I'm totally fine with this suggestion. This can be part
of a following patch(set).
> 
> > +
> > +$|=1; # line buffering
> 
> Why is this needed?

This line does not belong to the patch and is already part of the
general-functions.pl file.

The following can be found in the perl special variables manual about
$|: 

$| - If set to nonzero, forces an fflush(3) after every write or print
on the currently selected output channel.

I'm not sure, if this line is required in general.

> 
> > +
> > $General::version = 'VERSION';
> > $General::swroot = 'CONFIG_ROOT';
> > $General::noipprefix = 'noipg-';
> > @@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
> > return %protocols;
> > }
> > 
> > +# Function to grab a given URL content or to download and store it
> > on disk.
> > +#
> > +# The function requires a configuration hash to be passed.
> > +#
> > +# The following options (hash keys) are supported:
> > +#
> > +# URL -> The URL to the content or file. REQUIRED!
> > +# FILE -> The filename as fullpath where the content/file should
> > be stored on disk.
> > +# ETAGSFILE -> A filename again as fullpath where Etags should be
> > stored and read.
> > +# ETAGPREFIX -> In case a custom etag name should be used,
> > otherwise it defaults to the given URL.
> > +# MAXSIZE -> In bytes until the downloader will abort downloading.
> > (example: 10_485_760 for 10MB)
> > +#
> > +# If a file is given an If-Modified-Since header will be generated
> > from the last modified timestamp
> > +# of an already stored file. In case an Etag file is specified an
> > If-None-Match header will be added to
> > +# the request - Both can be used at the same time.
> > +#
> > +# In case no FILE option has been passed to the function, the
> > content of the requested URL will be returned.
> > +#
> > +# Return codes (if FILE is used):
> > +#
> > +# nothing - On success
> > +# no url - If no URL has been specified.
> > +# not_modified - In case the servers responds with "Not modified"
> > (304)
> > +# dl_error - If the requested URL cannot be accessed.
> > +# incomplete download - In case the size of the local file does
> > not match the remote content_lenght.
> > +#
> > +sub downloader (%) {
> > + my (%args) = @_;
> > +
> > + # Remap args hash and convert all keys into upper case format.
> > + %args = map { uc $_ => $args{$_} } keys %args;
> > +
> > + # The amount of download attempts before giving up and
> > + # logging an error.
> > + my $max_dl_attempts = 3;
> > +
> > + # Temporary directory to download the files.
> > + my $tmp_dl_directory = "/var/tmp";
> > +
> > + # Assign hash values.
> > + my $url = $args{"URL"} if (exists($args{"URL"}));
> > + my $file = $args{"FILE"} if (exists($args{"FILE"}));
> > + my $etags_file = $args{"ETAGSFILE"} if
> > (exists($args{"ETAGSFILE"}));
> > + my $etagprefix = $url;
> > + $etagprefix = $args{"ETAGPREFIX"} if
> > (exists($args{"ETAGPREFIX"}));
> > + my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));
> 
> I consider this really ugly, but I suppose this is what Perl wants.
> 
> > + # Abort with error "no url", if no URL has been given.
> > + return "no url" unless ($url);
> 
> Could we rather die here with an error? Just returning a string here
> seems to be too easy to ignore in case something above went wrong.

Technically you are right, but let me explain the reason for this kind
of error handling a bit further down.
 
> 
> > +
> > + my %etags = ();
> > + my $tmpfile;
> > +
> > + # Read-in proxysettings.
> > + my %proxysettings=();
> > + &readhash("${General::swroot}/proxy/settings", \%proxysettings);
> > +
> > + # Create a user agent instance.
> > + #
> > + # Request SSL hostname verification and specify path
> > + # to the CA file.
> > + my $ua = LWP::UserAgent->new(
> > + ssl_opts => {
> > + SSL_ca_file     => '/etc/ssl/cert.pem',
> > + verify_hostname => 1,
> > + },
> > + );
> > +
> > + # Set timeout to 10 seconds.
> > + $ua->timeout(10);
> 
> A 10 second timeout also sounds rather optimistic considering some
> slower, overloaded mirrors or those that are quite far away. For any
> tasks like downloading data in the background we could afford longer.

Accepted, what would be a good default value here?

> 
> > +
> > + # Assign maximum download size if set.
> > + $ua->max_size($max_size) if ($max_size);
> > +
> > + # Generate UserAgent.
> > + my $agent = &MakeUserAgent();
> > +
> > + # Set the generated UserAgent.
> > + $ua->agent($agent);
> > +
> > + # Check if an upstream proxy is configured.
> > + if ($proxysettings{'UPSTREAM_PROXY'}) {
> > + my $proxy_url;
> > +
> > + $proxy_url = "http://";
> > +
> > + # Check if the proxy requires authentication.
> > + if (($proxysettings{'UPSTREAM_USER'}) &&
> > ($proxysettings{'UPSTREAM_PASSWORD'})) {
> > + $proxy_url .=
> > "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD
> > '}\@";
> > + }
> > +
> > + # Add proxy server address and port.
> > + $proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
> > +
> > + # Append proxy settings.
> > + $ua->proxy(['http', 'https'], $proxy_url);
> > + }
> > +
> > + # Create a HTTP request element and pass the given URL to it.
> > + my $request = HTTP::Request->new(GET => $url);
> > +
> > + # Check if a file to store the output has been provided.
> > + if ($file) {
> > + # Check if the given file already exits, because it has been
> > downloaded in the past.
> > + #
> > + # In this case we are requesting the server if the remote file
> > has been changed or not.
> > + # This will be done by sending the modification time in a special
> > HTTP header.
> > + if (-f $file) {
> > + # Call stat on the file.
> > + my $stat = stat($file);
> > +
> > + # Omit the mtime of the existing file.
> > + my $mtime = $stat->mtime;
> > +
> > + # Convert the timestamp into right format.
> > + my $http_date = time2str($mtime);
> > +
> > + # Add the If-Modified-Since header to the request to ask the
> > server if the
> > + # file has been modified.
> > + $request->header( 'If-Modified-Since' => "$http_date" );
> > + }
> > +
> > + # Generate temporary file name, located in the tempoary download
> > directory and with a suffix of ".tmp".
> > + # The downloaded file will be stored there until some sanity
> > checks are performed.
> > + my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR =>
> > "$tmp_dl_directory/", UNLINK => 0 );
> > + $tmpfile = $tmp->filename();
> > + }
> > +
> > + # Check if an file for etags has been given.
> > + if ($etags_file) {
> > + # Read-in Etags file for known Etags if the file is present.
> > + &readhash("$etags_file", \%etags) if (-f $etags_file);
> > +
> > + # Check if an Etag for the current provider is stored.
> > + if ($etags{$etagprefix}) {
> > + # Grab the stored tag.
> > + my $etag = $etags{$etagprefix};
> > +
> > + # Add an "If-None-Match header to the request to ask the server
> > if the
> > + # file has been modified.
> > + $request->header( 'If-None-Match' => $etag );
> > + }
> > + }
> > +
> > + my $dl_attempt = 1;
> > + my $response;
> > +
> > + # Download and retry on failure.
> > + while ($dl_attempt <= $max_dl_attempts) {
> > + # Perform the request and save the output into the tmpfile if
> > requested.
> > + $response = $ua->request($request, $tmpfile);
> > +
> > + # Check if the download was successfull.
> > + if($response->is_success) {
> > + # Break loop.
> > + last;
> > +
> > + # Check if the server responds with 304 (Not Modified).
> > + } elsif ($response->code == 304) {
> > + # Remove temporary file, if one exists.
> > + unlink("$tmpfile") if (-e "$tmpfile");
> > +
> > + # Return "not modified".
> > + return "not modified";
> 
> Same here. It feels like strings are not the most ideal return code.

As promised up, I'm going to explain why I decided to return various
strings in case of different errors. Calling die() immediately
terminates the perl (main) script.

So for example when processing a list of downloads and such an
event/exception/error appeared in my opinion the main script, which
called the function should decide how to go further.

This is very similar to C where you call the function and take care of
the return code and decide what to do next.

So in this case I have to disagree with you and strongly recommend to
keep the error processing at it is.

Best regards,

-Stefan

> 
> > +
> > + # Check if we ran out of download re-tries.
> > + } elsif ($dl_attempt eq $max_dl_attempts) {
> > + # Obtain error.
> > + my $error = $response->content;
> > +
> > + # Remove temporary file, if one exists.
> > + unlink("$tmpfile") if (-e "$tmpfile");
> > +
> > + # Return the error message from response..
> > + return "$error";
> > + }
> > +
> > + # Remove temporary file, if one exists.
> > + unlink("$tmpfile") if (-e "$tmpfile");
> > +
> > + # Increase download attempt counter.
> > + $dl_attempt++;
> > + }
> > +
> > + # Obtain the connection headers.
> > + my $headers = $response->headers;
> > +
> > + # Check if an Etag file has been provided.
> > + if ($etags_file) {
> > + # Grab the Etag from the response if the server provides one.
> > + if ($response->header('Etag')) {
> > + # Add the provided Etag to the hash of tags.
> > + $etags{$etagprefix} = $response->header('Etag');
> > +
> > + # Write the etags file.
> > + &writehash($etags_file, \%etags);
> > + }
> > + }
> > +
> > + # Check if the response should be stored on disk.
> > + if ($file) {
> > + # Get the remote size of the content.
> > + my $remote_size = $response->header('Content-Length');
> > +
> > + # Perform a stat on the temporary file.
> > + my $stat = stat($tmpfile);
> > +
> > + # Grab the size of the stored temporary file.
> > + my $local_size = $stat->size;
> > +
> > + # Check if both sizes are equal.
> > + if(($remote_size) && ($remote_size ne $local_size)) {
> > + # Delete the temporary file.
> > + unlink("$tmpfile");
> > +
> > + # Abort and return "incomplete download" as error.
> > + return "incomplete download";
> > + }
> > +
> > + # Move the temporaray file to the desired file by overwriting a
> > may
> > + # existing one.
> > + move("$tmpfile", "$file");
> > +
> > + # Omit the timestamp from response header, when the file has been
> > modified the
> > + # last time.
> > + my $last_modified = $headers->last_modified;
> > +
> > + # Check if we got a last-modified value from the server.
> > + if ($last_modified) {
> > + # Assign the last-modified timestamp as mtime to the
> > + # stored file.
> > + utime(time(), "$last_modified", "$file");
> > + }
> > +
> > + # Delete temporary file.
> > + unlink("$tmpfile");
> > +
> > + # If we got here, everything worked fine. Return nothing.
> > + return;
> > + } else {
> > + # Decode the response content and return it.
> > + return $response->decoded_content;
> > + }
> > +}
> > +
> > # Cloud Stuff
> > 
> > sub running_in_cloud() {
> > -- 
> > 2.47.2
> > 
> > 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
  2025-03-23 19:00   ` Stefan Schantl
@ 2025-03-24 10:02     ` Michael Tremer
  2025-03-26 12:47       ` Stefan Schantl
  0 siblings, 1 reply; 9+ messages in thread
From: Michael Tremer @ 2025-03-24 10:02 UTC (permalink / raw)
  To: Stefan Schantl; +Cc: development

Hello,

> On 23 Mar 2025, at 19:00, Stefan Schantl <stefan.schantl@ipfire.org> wrote:
> 
> Hello Michael,
> 
> thanks for the review and your detailed feedback.
> 
> You are right this patch mainly moves the downloader logic from the
> ipblocklist feature into an own easy to use general function.
> 
>> Morning Stefan,
>> 
>> Thank you for taking the time and getting this out of your git
>> repository and onto this list :)
>> 
>> As we are already shipping this code and this patch only moves it
>> around, there is probably not much standing in the way of seeing this
>> being merged. I have however a couple of questions that we might want
>> to address in a follow-up patch.
>> 
>>> On 22 Mar 2025, at 14:57, Stefan Schantl
>>> <stefan.schantl@ipfire.org> wrote:
>>> 
>>> This function can be used to grab content and/or store it into
>>> files.
>>> 
>>> Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
>>> ---
>>> config/cfgroot/general-functions.pl | 262
>>> ++++++++++++++++++++++++++++
>>> 1 file changed, 262 insertions(+)
>>> 
>>> diff --git a/config/cfgroot/general-functions.pl
>>> b/config/cfgroot/general-functions.pl
>>> index 8ba6e3f79..cb8df69c6 100644
>>> --- a/config/cfgroot/general-functions.pl
>>> +++ b/config/cfgroot/general-functions.pl
>>> @@ -20,6 +20,23 @@ use IO::Socket;
>>> use Net::SSLeay;
>>> use Net::IPv4Addr qw(:all);
>>> 
>>> +# Load module to move files.
>>> +use File::Copy;
>>> +
>>> +# Load module to get file stats.
>>> +use File::stat;
>>> +
>>> +# Load module to deal with temporary files.
>>> +use File::Temp;
>>> +
>>> +# Load module to deal with the date formats used by the HTTP
>>> protocol.
>>> +use HTTP::Date;
>>> +
>>> +# Load the libwwwperl User Agent module.
>>> +use LWP::UserAgent;
>> 
>> Recently (let’s say a year ago) I have spent some time on making the
>> web UI faster. Especially on some hardware, is it not very snappy.
>> The reason for the slowness however was not that we are doing much
>> stuff in our code; it was rather that loading modules was taking a
>> long time in Perl. So I removed what was no longer needed and moved
>> some other stuff around which slightly helped (saved around 100ms on
>> the older IPFire Mini Appliance).
>> 
>> Since the downloading functionality is not used *that* much here.
>> Should this maybe not live in a separate place and we only include it
>> when it is needed?
> 
> Sounds good, so I'm totally fine with this suggestion. This can be part
> of a following patch(set).

Well, moving it should not be a separate patch set, because we should not move it twice.

But any of the other fixes should be implemented in a separate patch each.

>> 
>>> +
>>> +$|=1; # line buffering
>> 
>> Why is this needed?
> 
> This line does not belong to the patch and is already part of the
> general-functions.pl file.
> 
> The following can be found in the perl special variables manual about
> $|: 
> 
> $| - If set to nonzero, forces an fflush(3) after every write or print
> on the currently selected output channel.
> 
> I'm not sure, if this line is required in general.

I know what it does. I just wanted to know why this has been enabled here.

> 
>> 
>>> +
>>> $General::version = 'VERSION';
>>> $General::swroot = 'CONFIG_ROOT';
>>> $General::noipprefix = 'noipg-';
>>> @@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
>>> return %protocols;
>>> }
>>> 
>>> +# Function to grab a given URL content or to download and store it
>>> on disk.
>>> +#
>>> +# The function requires a configuration hash to be passed.
>>> +#
>>> +# The following options (hash keys) are supported:
>>> +#
>>> +# URL -> The URL to the content or file. REQUIRED!
>>> +# FILE -> The filename as fullpath where the content/file should
>>> be stored on disk.
>>> +# ETAGSFILE -> A filename again as fullpath where Etags should be
>>> stored and read.
>>> +# ETAGPREFIX -> In case a custom etag name should be used,
>>> otherwise it defaults to the given URL.
>>> +# MAXSIZE -> In bytes until the downloader will abort downloading.
>>> (example: 10_485_760 for 10MB)
>>> +#
>>> +# If a file is given an If-Modified-Since header will be generated
>>> from the last modified timestamp
>>> +# of an already stored file. In case an Etag file is specified an
>>> If-None-Match header will be added to
>>> +# the request - Both can be used at the same time.
>>> +#
>>> +# In case no FILE option has been passed to the function, the
>>> content of the requested URL will be returned.
>>> +#
>>> +# Return codes (if FILE is used):
>>> +#
>>> +# nothing - On success
>>> +# no url - If no URL has been specified.
>>> +# not_modified - In case the servers responds with "Not modified"
>>> (304)
>>> +# dl_error - If the requested URL cannot be accessed.
>>> +# incomplete download - In case the size of the local file does
>>> not match the remote content_lenght.
>>> +#
>>> +sub downloader (%) {
>>> + my (%args) = @_;
>>> +
>>> + # Remap args hash and convert all keys into upper case format.
>>> + %args = map { uc $_ => $args{$_} } keys %args;
>>> +
>>> + # The amount of download attempts before giving up and
>>> + # logging an error.
>>> + my $max_dl_attempts = 3;
>>> +
>>> + # Temporary directory to download the files.
>>> + my $tmp_dl_directory = "/var/tmp";
>>> +
>>> + # Assign hash values.
>>> + my $url = $args{"URL"} if (exists($args{"URL"}));
>>> + my $file = $args{"FILE"} if (exists($args{"FILE"}));
>>> + my $etags_file = $args{"ETAGSFILE"} if
>>> (exists($args{"ETAGSFILE"}));
>>> + my $etagprefix = $url;
>>> + $etagprefix = $args{"ETAGPREFIX"} if
>>> (exists($args{"ETAGPREFIX"}));
>>> + my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));
>> 
>> I consider this really ugly, but I suppose this is what Perl wants.
>> 
>>> + # Abort with error "no url", if no URL has been given.
>>> + return "no url" unless ($url);
>> 
>> Could we rather die here with an error? Just returning a string here
>> seems to be too easy to ignore in case something above went wrong.
> 
> Technically you are right, but let me explain the reason for this kind
> of error handling a bit further down.
> 
>> 
>>> +
>>> + my %etags = ();
>>> + my $tmpfile;
>>> +
>>> + # Read-in proxysettings.
>>> + my %proxysettings=();
>>> + &readhash("${General::swroot}/proxy/settings", \%proxysettings);
>>> +
>>> + # Create a user agent instance.
>>> + #
>>> + # Request SSL hostname verification and specify path
>>> + # to the CA file.
>>> + my $ua = LWP::UserAgent->new(
>>> + ssl_opts => {
>>> + SSL_ca_file     => '/etc/ssl/cert.pem',
>>> + verify_hostname => 1,
>>> + },
>>> + );
>>> +
>>> + # Set timeout to 10 seconds.
>>> + $ua->timeout(10);
>> 
>> A 10 second timeout also sounds rather optimistic considering some
>> slower, overloaded mirrors or those that are quite far away. For any
>> tasks like downloading data in the background we could afford longer.
> 
> Accepted, what would be a good default value here?

60 seconds? I think it makes sense to make this optionally configurable so that for the IP check that has been changed here, we will only wait around 10 seconds as it is a simple API request.

> 
>> 
>>> +
>>> + # Assign maximum download size if set.
>>> + $ua->max_size($max_size) if ($max_size);
>>> +
>>> + # Generate UserAgent.
>>> + my $agent = &MakeUserAgent();
>>> +
>>> + # Set the generated UserAgent.
>>> + $ua->agent($agent);
>>> +
>>> + # Check if an upstream proxy is configured.
>>> + if ($proxysettings{'UPSTREAM_PROXY'}) {
>>> + my $proxy_url;
>>> +
>>> + $proxy_url = "http://";
>>> +
>>> + # Check if the proxy requires authentication.
>>> + if (($proxysettings{'UPSTREAM_USER'}) &&
>>> ($proxysettings{'UPSTREAM_PASSWORD'})) {
>>> + $proxy_url .=
>>> "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD
>>> '}\@";
>>> + }
>>> +
>>> + # Add proxy server address and port.
>>> + $proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
>>> +
>>> + # Append proxy settings.
>>> + $ua->proxy(['http', 'https'], $proxy_url);
>>> + }
>>> +
>>> + # Create a HTTP request element and pass the given URL to it.
>>> + my $request = HTTP::Request->new(GET => $url);
>>> +
>>> + # Check if a file to store the output has been provided.
>>> + if ($file) {
>>> + # Check if the given file already exits, because it has been
>>> downloaded in the past.
>>> + #
>>> + # In this case we are requesting the server if the remote file
>>> has been changed or not.
>>> + # This will be done by sending the modification time in a special
>>> HTTP header.
>>> + if (-f $file) {
>>> + # Call stat on the file.
>>> + my $stat = stat($file);
>>> +
>>> + # Omit the mtime of the existing file.
>>> + my $mtime = $stat->mtime;
>>> +
>>> + # Convert the timestamp into right format.
>>> + my $http_date = time2str($mtime);
>>> +
>>> + # Add the If-Modified-Since header to the request to ask the
>>> server if the
>>> + # file has been modified.
>>> + $request->header( 'If-Modified-Since' => "$http_date" );
>>> + }
>>> +
>>> + # Generate temporary file name, located in the tempoary download
>>> directory and with a suffix of ".tmp".
>>> + # The downloaded file will be stored there until some sanity
>>> checks are performed.
>>> + my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR =>
>>> "$tmp_dl_directory/", UNLINK => 0 );
>>> + $tmpfile = $tmp->filename();
>>> + }
>>> +
>>> + # Check if an file for etags has been given.
>>> + if ($etags_file) {
>>> + # Read-in Etags file for known Etags if the file is present.
>>> + &readhash("$etags_file", \%etags) if (-f $etags_file);
>>> +
>>> + # Check if an Etag for the current provider is stored.
>>> + if ($etags{$etagprefix}) {
>>> + # Grab the stored tag.
>>> + my $etag = $etags{$etagprefix};
>>> +
>>> + # Add an "If-None-Match header to the request to ask the server
>>> if the
>>> + # file has been modified.
>>> + $request->header( 'If-None-Match' => $etag );
>>> + }
>>> + }
>>> +
>>> + my $dl_attempt = 1;
>>> + my $response;
>>> +
>>> + # Download and retry on failure.
>>> + while ($dl_attempt <= $max_dl_attempts) {
>>> + # Perform the request and save the output into the tmpfile if
>>> requested.
>>> + $response = $ua->request($request, $tmpfile);
>>> +
>>> + # Check if the download was successfull.
>>> + if($response->is_success) {
>>> + # Break loop.
>>> + last;
>>> +
>>> + # Check if the server responds with 304 (Not Modified).
>>> + } elsif ($response->code == 304) {
>>> + # Remove temporary file, if one exists.
>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>> +
>>> + # Return "not modified".
>>> + return "not modified";
>> 
>> Same here. It feels like strings are not the most ideal return code.
> 
> As promised up, I'm going to explain why I decided to return various
> strings in case of different errors. Calling die() immediately
> terminates the perl (main) script.

This is correct, and I never really like this behaviour. However, Perl does not know exceptions and so we cannot gracefully throw an error.

But, not passing a URL is a programming error and that should get some attention.

The “not modified” case should of course not call die.

> So for example when processing a list of downloads and such an
> event/exception/error appeared in my opinion the main script, which
> called the function should decide how to go further.
> 
> This is very similar to C where you call the function and take care of
> the return code and decide what to do next.

Usually in C we return an integer or even better an enum.

> So in this case I have to disagree with you and strongly recommend to
> keep the error processing at it is.
> 
> Best regards,
> 
> -Stefan
> 
>> 
>>> +
>>> + # Check if we ran out of download re-tries.
>>> + } elsif ($dl_attempt eq $max_dl_attempts) {
>>> + # Obtain error.
>>> + my $error = $response->content;
>>> +
>>> + # Remove temporary file, if one exists.
>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>> +
>>> + # Return the error message from response..
>>> + return "$error";
>>> + }
>>> +
>>> + # Remove temporary file, if one exists.
>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>> +
>>> + # Increase download attempt counter.
>>> + $dl_attempt++;
>>> + }
>>> +
>>> + # Obtain the connection headers.
>>> + my $headers = $response->headers;
>>> +
>>> + # Check if an Etag file has been provided.
>>> + if ($etags_file) {
>>> + # Grab the Etag from the response if the server provides one.
>>> + if ($response->header('Etag')) {
>>> + # Add the provided Etag to the hash of tags.
>>> + $etags{$etagprefix} = $response->header('Etag');
>>> +
>>> + # Write the etags file.
>>> + &writehash($etags_file, \%etags);
>>> + }
>>> + }
>>> +
>>> + # Check if the response should be stored on disk.
>>> + if ($file) {
>>> + # Get the remote size of the content.
>>> + my $remote_size = $response->header('Content-Length');
>>> +
>>> + # Perform a stat on the temporary file.
>>> + my $stat = stat($tmpfile);
>>> +
>>> + # Grab the size of the stored temporary file.
>>> + my $local_size = $stat->size;
>>> +
>>> + # Check if both sizes are equal.
>>> + if(($remote_size) && ($remote_size ne $local_size)) {
>>> + # Delete the temporary file.
>>> + unlink("$tmpfile");
>>> +
>>> + # Abort and return "incomplete download" as error.
>>> + return "incomplete download";
>>> + }
>>> +
>>> + # Move the temporaray file to the desired file by overwriting a
>>> may
>>> + # existing one.
>>> + move("$tmpfile", "$file");
>>> +
>>> + # Omit the timestamp from response header, when the file has been
>>> modified the
>>> + # last time.
>>> + my $last_modified = $headers->last_modified;
>>> +
>>> + # Check if we got a last-modified value from the server.
>>> + if ($last_modified) {
>>> + # Assign the last-modified timestamp as mtime to the
>>> + # stored file.
>>> + utime(time(), "$last_modified", "$file");
>>> + }
>>> +
>>> + # Delete temporary file.
>>> + unlink("$tmpfile");
>>> +
>>> + # If we got here, everything worked fine. Return nothing.
>>> + return;
>>> + } else {
>>> + # Decode the response content and return it.
>>> + return $response->decoded_content;
>>> + }
>>> +}
>>> +
>>> # Cloud Stuff
>>> 
>>> sub running_in_cloud() {
>>> -- 
>>> 2.47.2




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
  2025-03-24 10:02     ` Michael Tremer
@ 2025-03-26 12:47       ` Stefan Schantl
  2025-03-26 16:18         ` Michael Tremer
  0 siblings, 1 reply; 9+ messages in thread
From: Stefan Schantl @ 2025-03-26 12:47 UTC (permalink / raw)
  To: Michael Tremer; +Cc: development

[-- Attachment #1: Type: text/plain, Size: 15174 bytes --]



Am 24. März 2025 11:02:08 schrieb Michael Tremer <michael.tremer@ipfire.org>:

> Hello,
>
>> On 23 Mar 2025, at 19:00, Stefan Schantl <stefan.schantl@ipfire.org> wrote:
>>
>> Hello Michael,
>>
>> thanks for the review and your detailed feedback.
>>
>> You are right this patch mainly moves the downloader logic from the
>> ipblocklist feature into an own easy to use general function.
>>
>>> Morning Stefan,
>>>
>>> Thank you for taking the time and getting this out of your git
>>> repository and onto this list :)
>>>
>>> As we are already shipping this code and this patch only moves it
>>> around, there is probably not much standing in the way of seeing this
>>> being merged. I have however a couple of questions that we might want
>>> to address in a follow-up patch.
>>>
>>>> On 22 Mar 2025, at 14:57, Stefan Schantl
>>>> <stefan.schantl@ipfire.org> wrote:
>>>>
>>>> This function can be used to grab content and/or store it into
>>>> files.
>>>>
>>>> Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
>>>> ---
>>>> config/cfgroot/general-functions.pl | 262
>>>> ++++++++++++++++++++++++++++
>>>> 1 file changed, 262 insertions(+)
>>>>
>>>> diff --git a/config/cfgroot/general-functions.pl
>>>> b/config/cfgroot/general-functions.pl
>>>> index 8ba6e3f79..cb8df69c6 100644
>>>> --- a/config/cfgroot/general-functions.pl
>>>> +++ b/config/cfgroot/general-functions.pl
>>>> @@ -20,6 +20,23 @@ use IO::Socket;
>>>> use Net::SSLeay;
>>>> use Net::IPv4Addr qw(:all);
>>>>
>>>> +# Load module to move files.
>>>> +use File::Copy;
>>>> +
>>>> +# Load module to get file stats.
>>>> +use File::stat;
>>>> +
>>>> +# Load module to deal with temporary files.
>>>> +use File::Temp;
>>>> +
>>>> +# Load module to deal with the date formats used by the HTTP
>>>> protocol.
>>>> +use HTTP::Date;
>>>> +
>>>> +# Load the libwwwperl User Agent module.
>>>> +use LWP::UserAgent;
>>>
>>> Recently (let’s say a year ago) I have spent some time on making the
>>> web UI faster. Especially on some hardware, is it not very snappy.
>>> The reason for the slowness however was not that we are doing much
>>> stuff in our code; it was rather that loading modules was taking a
>>> long time in Perl. So I removed what was no longer needed and moved
>>> some other stuff around which slightly helped (saved around 100ms on
>>> the older IPFire Mini Appliance).
>>>
>>> Since the downloading functionality is not used *that* much here.
>>> Should this maybe not live in a separate place and we only include it
>>> when it is needed?
>>
>> Sounds good, so I'm totally fine with this suggestion. This can be part
>> of a following patch(set).
>
> Well, moving it should not be a separate patch set, because we should not 
> move it twice.

Hello Michael,

this absolutely makes sense. Any ideas for the filename?

I think downloader.pl could be misinterpreted by a certain users - maybe 
web-function.pl?

>
> But any of the other fixes should be implemented in a separate patch each.

Let's do it this way.

>
>
>>>
>>>> +
>>>> +$|=1; # line buffering
>>>
>>> Why is this needed?
>>
>> This line does not belong to the patch and is already part of the
>> general-functions.pl file.
>>
>> The following can be found in the perl special variables manual about
>> $|:
>>
>> $| - If set to nonzero, forces an fflush(3) after every write or print
>> on the currently selected output channel.
>>
>> I'm not sure, if this line is required in general.
>
> I know what it does. I just wanted to know why this has been enabled here.

Short answer: No clue.

>
>
>>
>>>
>>>> +
>>>> $General::version = 'VERSION';
>>>> $General::swroot = 'CONFIG_ROOT';
>>>> $General::noipprefix = 'noipg-';
>>>> @@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
>>>> return %protocols;
>>>> }
>>>>
>>>> +# Function to grab a given URL content or to download and store it
>>>> on disk.
>>>> +#
>>>> +# The function requires a configuration hash to be passed.
>>>> +#
>>>> +# The following options (hash keys) are supported:
>>>> +#
>>>> +# URL -> The URL to the content or file. REQUIRED!
>>>> +# FILE -> The filename as fullpath where the content/file should
>>>> be stored on disk.
>>>> +# ETAGSFILE -> A filename again as fullpath where Etags should be
>>>> stored and read.
>>>> +# ETAGPREFIX -> In case a custom etag name should be used,
>>>> otherwise it defaults to the given URL.
>>>> +# MAXSIZE -> In bytes until the downloader will abort downloading.
>>>> (example: 10_485_760 for 10MB)
>>>> +#
>>>> +# If a file is given an If-Modified-Since header will be generated
>>>> from the last modified timestamp
>>>> +# of an already stored file. In case an Etag file is specified an
>>>> If-None-Match header will be added to
>>>> +# the request - Both can be used at the same time.
>>>> +#
>>>> +# In case no FILE option has been passed to the function, the
>>>> content of the requested URL will be returned.
>>>> +#
>>>> +# Return codes (if FILE is used):
>>>> +#
>>>> +# nothing - On success
>>>> +# no url - If no URL has been specified.
>>>> +# not_modified - In case the servers responds with "Not modified"
>>>> (304)
>>>> +# dl_error - If the requested URL cannot be accessed.
>>>> +# incomplete download - In case the size of the local file does
>>>> not match the remote content_lenght.
>>>> +#
>>>> +sub downloader (%) {
>>>> + my (%args) = @_;
>>>> +
>>>> + # Remap args hash and convert all keys into upper case format.
>>>> + %args = map { uc $_ => $args{$_} } keys %args;
>>>> +
>>>> + # The amount of download attempts before giving up and
>>>> + # logging an error.
>>>> + my $max_dl_attempts = 3;
>>>> +
>>>> + # Temporary directory to download the files.
>>>> + my $tmp_dl_directory = "/var/tmp";
>>>> +
>>>> + # Assign hash values.
>>>> + my $url = $args{"URL"} if (exists($args{"URL"}));
>>>> + my $file = $args{"FILE"} if (exists($args{"FILE"}));
>>>> + my $etags_file = $args{"ETAGSFILE"} if
>>>> (exists($args{"ETAGSFILE"}));
>>>> + my $etagprefix = $url;
>>>> + $etagprefix = $args{"ETAGPREFIX"} if
>>>> (exists($args{"ETAGPREFIX"}));
>>>> + my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));
>>>
>>> I consider this really ugly, but I suppose this is what Perl wants.
>>>
>>>> + # Abort with error "no url", if no URL has been given.
>>>> + return "no url" unless ($url);
>>>
>>> Could we rather die here with an error? Just returning a string here
>>> seems to be too easy to ignore in case something above went wrong.
>>
>> Technically you are right, but let me explain the reason for this kind
>> of error handling a bit further down.
>>
>>>
>>>> +
>>>> + my %etags = ();
>>>> + my $tmpfile;
>>>> +
>>>> + # Read-in proxysettings.
>>>> + my %proxysettings=();
>>>> + &readhash("${General::swroot}/proxy/settings", \%proxysettings);
>>>> +
>>>> + # Create a user agent instance.
>>>> + #
>>>> + # Request SSL hostname verification and specify path
>>>> + # to the CA file.
>>>> + my $ua = LWP::UserAgent->new(
>>>> + ssl_opts => {
>>>> + SSL_ca_file     => '/etc/ssl/cert.pem',
>>>> + verify_hostname => 1,
>>>> + },
>>>> + );
>>>> +
>>>> + # Set timeout to 10 seconds.
>>>> + $ua->timeout(10);
>>>
>>> A 10 second timeout also sounds rather optimistic considering some
>>> slower, overloaded mirrors or those that are quite far away. For any
>>> tasks like downloading data in the background we could afford longer.
>>
>> Accepted, what would be a good default value here?
>
> 60 seconds? I think it makes sense to make this optionally configurable so 
> that for the IP check that has been changed here, we will only wait around 
> 10 seconds as it is a simple API request.

I'll  implement it as optional and config-able feature. Default if not set 
would be 60 seconds.

>
>
>>
>>>
>>>> +
>>>> + # Assign maximum download size if set.
>>>> + $ua->max_size($max_size) if ($max_size);
>>>> +
>>>> + # Generate UserAgent.
>>>> + my $agent = &MakeUserAgent();
>>>> +
>>>> + # Set the generated UserAgent.
>>>> + $ua->agent($agent);
>>>> +
>>>> + # Check if an upstream proxy is configured.
>>>> + if ($proxysettings{'UPSTREAM_PROXY'}) {
>>>> + my $proxy_url;
>>>> +
>>>> + $proxy_url = "http://";
>>>> +
>>>> + # Check if the proxy requires authentication.
>>>> + if (($proxysettings{'UPSTREAM_USER'}) &&
>>>> ($proxysettings{'UPSTREAM_PASSWORD'})) {
>>>> + $proxy_url .=
>>>> "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD
>>>> '}\@";
>>>> + }
>>>> +
>>>> + # Add proxy server address and port.
>>>> + $proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
>>>> +
>>>> + # Append proxy settings.
>>>> + $ua->proxy(['http', 'https'], $proxy_url);
>>>> + }
>>>> +
>>>> + # Create a HTTP request element and pass the given URL to it.
>>>> + my $request = HTTP::Request->new(GET => $url);
>>>> +
>>>> + # Check if a file to store the output has been provided.
>>>> + if ($file) {
>>>> + # Check if the given file already exits, because it has been
>>>> downloaded in the past.
>>>> + #
>>>> + # In this case we are requesting the server if the remote file
>>>> has been changed or not.
>>>> + # This will be done by sending the modification time in a special
>>>> HTTP header.
>>>> + if (-f $file) {
>>>> + # Call stat on the file.
>>>> + my $stat = stat($file);
>>>> +
>>>> + # Omit the mtime of the existing file.
>>>> + my $mtime = $stat->mtime;
>>>> +
>>>> + # Convert the timestamp into right format.
>>>> + my $http_date = time2str($mtime);
>>>> +
>>>> + # Add the If-Modified-Since header to the request to ask the
>>>> server if the
>>>> + # file has been modified.
>>>> + $request->header( 'If-Modified-Since' => "$http_date" );
>>>> + }
>>>> +
>>>> + # Generate temporary file name, located in the tempoary download
>>>> directory and with a suffix of ".tmp".
>>>> + # The downloaded file will be stored there until some sanity
>>>> checks are performed.
>>>> + my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR =>
>>>> "$tmp_dl_directory/", UNLINK => 0 );
>>>> + $tmpfile = $tmp->filename();
>>>> + }
>>>> +
>>>> + # Check if an file for etags has been given.
>>>> + if ($etags_file) {
>>>> + # Read-in Etags file for known Etags if the file is present.
>>>> + &readhash("$etags_file", \%etags) if (-f $etags_file);
>>>> +
>>>> + # Check if an Etag for the current provider is stored.
>>>> + if ($etags{$etagprefix}) {
>>>> + # Grab the stored tag.
>>>> + my $etag = $etags{$etagprefix};
>>>> +
>>>> + # Add an "If-None-Match header to the request to ask the server
>>>> if the
>>>> + # file has been modified.
>>>> + $request->header( 'If-None-Match' => $etag );
>>>> + }
>>>> + }
>>>> +
>>>> + my $dl_attempt = 1;
>>>> + my $response;
>>>> +
>>>> + # Download and retry on failure.
>>>> + while ($dl_attempt <= $max_dl_attempts) {
>>>> + # Perform the request and save the output into the tmpfile if
>>>> requested.
>>>> + $response = $ua->request($request, $tmpfile);
>>>> +
>>>> + # Check if the download was successfull.
>>>> + if($response->is_success) {
>>>> + # Break loop.
>>>> + last;
>>>> +
>>>> + # Check if the server responds with 304 (Not Modified).
>>>> + } elsif ($response->code == 304) {
>>>> + # Remove temporary file, if one exists.
>>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>>> +
>>>> + # Return "not modified".
>>>> + return "not modified";
>>>
>>> Same here. It feels like strings are not the most ideal return code.
>>
>> As promised up, I'm going to explain why I decided to return various
>> strings in case of different errors. Calling die() immediately
>> terminates the perl (main) script.
>
> This is correct, and I never really like this behaviour. However, Perl does 
> not know exceptions and so we cannot gracefully throw an error.
>
> But, not passing a URL is a programming error and that should get some 
> attention.

Yeah, you have take care of this kind of perl behaviour and I'm also not in 
favour of it. That was the reason why I decided to do it in this way.

I see the reason why you are recommending to die in case no url has passed 
- I'll adjust the code.

Best regards,

-Stefan

>
>
> The “not modified” case should of course not call die.
>
>> So for example when processing a list of downloads and such an
>> event/exception/error appeared in my opinion the main script, which
>> called the function should decide how to go further.
>>
>> This is very similar to C where you call the function and take care of
>> the return code and decide what to do next.
>
> Usually in C we return an integer or even better an enum.
>
>> So in this case I have to disagree with you and strongly recommend to
>> keep the error processing at it is.
>>
>> Best regards,
>>
>> -Stefan
>>
>>>
>>>> +
>>>> + # Check if we ran out of download re-tries.
>>>> + } elsif ($dl_attempt eq $max_dl_attempts) {
>>>> + # Obtain error.
>>>> + my $error = $response->content;
>>>> +
>>>> + # Remove temporary file, if one exists.
>>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>>> +
>>>> + # Return the error message from response..
>>>> + return "$error";
>>>> + }
>>>> +
>>>> + # Remove temporary file, if one exists.
>>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>>> +
>>>> + # Increase download attempt counter.
>>>> + $dl_attempt++;
>>>> + }
>>>> +
>>>> + # Obtain the connection headers.
>>>> + my $headers = $response->headers;
>>>> +
>>>> + # Check if an Etag file has been provided.
>>>> + if ($etags_file) {
>>>> + # Grab the Etag from the response if the server provides one.
>>>> + if ($response->header('Etag')) {
>>>> + # Add the provided Etag to the hash of tags.
>>>> + $etags{$etagprefix} = $response->header('Etag');
>>>> +
>>>> + # Write the etags file.
>>>> + &writehash($etags_file, \%etags);
>>>> + }
>>>> + }
>>>> +
>>>> + # Check if the response should be stored on disk.
>>>> + if ($file) {
>>>> + # Get the remote size of the content.
>>>> + my $remote_size = $response->header('Content-Length');
>>>> +
>>>> + # Perform a stat on the temporary file.
>>>> + my $stat = stat($tmpfile);
>>>> +
>>>> + # Grab the size of the stored temporary file.
>>>> + my $local_size = $stat->size;
>>>> +
>>>> + # Check if both sizes are equal.
>>>> + if(($remote_size) && ($remote_size ne $local_size)) {
>>>> + # Delete the temporary file.
>>>> + unlink("$tmpfile");
>>>> +
>>>> + # Abort and return "incomplete download" as error.
>>>> + return "incomplete download";
>>>> + }
>>>> +
>>>> + # Move the temporaray file to the desired file by overwriting a
>>>> may
>>>> + # existing one.
>>>> + move("$tmpfile", "$file");
>>>> +
>>>> + # Omit the timestamp from response header, when the file has been
>>>> modified the
>>>> + # last time.
>>>> + my $last_modified = $headers->last_modified;
>>>> +
>>>> + # Check if we got a last-modified value from the server.
>>>> + if ($last_modified) {
>>>> + # Assign the last-modified timestamp as mtime to the
>>>> + # stored file.
>>>> + utime(time(), "$last_modified", "$file");
>>>> + }
>>>> +
>>>> + # Delete temporary file.
>>>> + unlink("$tmpfile");
>>>> +
>>>> + # If we got here, everything worked fine. Return nothing.
>>>> + return;
>>>> + } else {
>>>> + # Decode the response content and return it.
>>>> + return $response->decoded_content;
>>>> + }
>>>> +}
>>>> +
>>>> # Cloud Stuff
>>>>
>>>> sub running_in_cloud() {
>>>> --
>>>> 2.47.2


[-- Attachment #2: Type: text/html, Size: 28562 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
  2025-03-26 12:47       ` Stefan Schantl
@ 2025-03-26 16:18         ` Michael Tremer
  0 siblings, 0 replies; 9+ messages in thread
From: Michael Tremer @ 2025-03-26 16:18 UTC (permalink / raw)
  To: Stefan Schantl; +Cc: development

Hello Stefan,

> On 26 Mar 2025, at 12:47, Stefan Schantl <stefan.schantl@ipfire.org> wrote:
> 
> 
> 
> Am 24. März 2025 11:02:08 schrieb Michael Tremer <michael.tremer@ipfire.org>:
> 
>> Hello,
>> 
>>> On 23 Mar 2025, at 19:00, Stefan Schantl <stefan.schantl@ipfire.org> wrote:
>>> 
>>> Hello Michael,
>>> 
>>> thanks for the review and your detailed feedback.
>>> 
>>> You are right this patch mainly moves the downloader logic from the
>>> ipblocklist feature into an own easy to use general function.
>>> 
>>>> Morning Stefan,
>>>> 
>>>> Thank you for taking the time and getting this out of your git
>>>> repository and onto this list :)
>>>> 
>>>> As we are already shipping this code and this patch only moves it
>>>> around, there is probably not much standing in the way of seeing this
>>>> being merged. I have however a couple of questions that we might want
>>>> to address in a follow-up patch.
>>>> 
>>>>> On 22 Mar 2025, at 14:57, Stefan Schantl
>>>>> <stefan.schantl@ipfire.org> wrote:
>>>>> 
>>>>> This function can be used to grab content and/or store it into
>>>>> files.
>>>>> 
>>>>> Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
>>>>> ---
>>>>> config/cfgroot/general-functions.pl | 262
>>>>> ++++++++++++++++++++++++++++
>>>>> 1 file changed, 262 insertions(+)
>>>>> 
>>>>> diff --git a/config/cfgroot/general-functions.pl
>>>>> b/config/cfgroot/general-functions.pl
>>>>> index 8ba6e3f79..cb8df69c6 100644
>>>>> --- a/config/cfgroot/general-functions.pl
>>>>> +++ b/config/cfgroot/general-functions.pl
>>>>> @@ -20,6 +20,23 @@ use IO::Socket;
>>>>> use Net::SSLeay;
>>>>> use Net::IPv4Addr qw(:all);
>>>>> 
>>>>> +# Load module to move files.
>>>>> +use File::Copy;
>>>>> +
>>>>> +# Load module to get file stats.
>>>>> +use File::stat;
>>>>> +
>>>>> +# Load module to deal with temporary files.
>>>>> +use File::Temp;
>>>>> +
>>>>> +# Load module to deal with the date formats used by the HTTP
>>>>> protocol.
>>>>> +use HTTP::Date;
>>>>> +
>>>>> +# Load the libwwwperl User Agent module.
>>>>> +use LWP::UserAgent;
>>>> 
>>>> Recently (let’s say a year ago) I have spent some time on making the
>>>> web UI faster. Especially on some hardware, is it not very snappy.
>>>> The reason for the slowness however was not that we are doing much
>>>> stuff in our code; it was rather that loading modules was taking a
>>>> long time in Perl. So I removed what was no longer needed and moved
>>>> some other stuff around which slightly helped (saved around 100ms on
>>>> the older IPFire Mini Appliance).
>>>> 
>>>> Since the downloading functionality is not used *that* much here.
>>>> Should this maybe not live in a separate place and we only include it
>>>> when it is needed?
>>> 
>>> Sounds good, so I'm totally fine with this suggestion. This can be part
>>> of a following patch(set).
>> 
>> Well, moving it should not be a separate patch set, because we should not move it twice.
> 
> 
> Hello Michael,
> 
> this absolutely makes sense. Any ideas for the filename?
> 
> I think downloader.pl could be misinterpreted by a certain users - maybe web-function.pl?

download-functions.pl?

> 
>> 
>> But any of the other fixes should be implemented in a separate patch each.
> 
> 
> Let's do it this way.
> 
>> 
>>>> 
>>>>> +
>>>>> +$|=1; # line buffering
>>>> 
>>>> Why is this needed?
>>> 
>>> This line does not belong to the patch and is already part of the
>>> general-functions.pl file.
>>> 
>>> The following can be found in the perl special variables manual about
>>> $|: 
>>> 
>>> $| - If set to nonzero, forces an fflush(3) after every write or print
>>> on the currently selected output channel.
>>> 
>>> I'm not sure, if this line is required in general.
>> 
>> I know what it does. I just wanted to know why this has been enabled here.
> 
> 
> Short answer: No clue.

Ooookay.

> 
>> 
>>> 
>>>> 
>>>>> +
>>>>> $General::version = 'VERSION';
>>>>> $General::swroot = 'CONFIG_ROOT';
>>>>> $General::noipprefix = 'noipg-';
>>>>> @@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
>>>>> return %protocols;
>>>>> }
>>>>> 
>>>>> +# Function to grab a given URL content or to download and store it
>>>>> on disk.
>>>>> +#
>>>>> +# The function requires a configuration hash to be passed.
>>>>> +#
>>>>> +# The following options (hash keys) are supported:
>>>>> +#
>>>>> +# URL -> The URL to the content or file. REQUIRED!
>>>>> +# FILE -> The filename as fullpath where the content/file should
>>>>> be stored on disk.
>>>>> +# ETAGSFILE -> A filename again as fullpath where Etags should be
>>>>> stored and read.
>>>>> +# ETAGPREFIX -> In case a custom etag name should be used,
>>>>> otherwise it defaults to the given URL.
>>>>> +# MAXSIZE -> In bytes until the downloader will abort downloading.
>>>>> (example: 10_485_760 for 10MB)
>>>>> +#
>>>>> +# If a file is given an If-Modified-Since header will be generated
>>>>> from the last modified timestamp
>>>>> +# of an already stored file. In case an Etag file is specified an
>>>>> If-None-Match header will be added to
>>>>> +# the request - Both can be used at the same time.
>>>>> +#
>>>>> +# In case no FILE option has been passed to the function, the
>>>>> content of the requested URL will be returned.
>>>>> +#
>>>>> +# Return codes (if FILE is used):
>>>>> +#
>>>>> +# nothing - On success
>>>>> +# no url - If no URL has been specified.
>>>>> +# not_modified - In case the servers responds with "Not modified"
>>>>> (304)
>>>>> +# dl_error - If the requested URL cannot be accessed.
>>>>> +# incomplete download - In case the size of the local file does
>>>>> not match the remote content_lenght.
>>>>> +#
>>>>> +sub downloader (%) {
>>>>> + my (%args) = @_;
>>>>> +
>>>>> + # Remap args hash and convert all keys into upper case format.
>>>>> + %args = map { uc $_ => $args{$_} } keys %args;
>>>>> +
>>>>> + # The amount of download attempts before giving up and
>>>>> + # logging an error.
>>>>> + my $max_dl_attempts = 3;
>>>>> +
>>>>> + # Temporary directory to download the files.
>>>>> + my $tmp_dl_directory = "/var/tmp";
>>>>> +
>>>>> + # Assign hash values.
>>>>> + my $url = $args{"URL"} if (exists($args{"URL"}));
>>>>> + my $file = $args{"FILE"} if (exists($args{"FILE"}));
>>>>> + my $etags_file = $args{"ETAGSFILE"} if
>>>>> (exists($args{"ETAGSFILE"}));
>>>>> + my $etagprefix = $url;
>>>>> + $etagprefix = $args{"ETAGPREFIX"} if
>>>>> (exists($args{"ETAGPREFIX"}));
>>>>> + my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));
>>>> 
>>>> I consider this really ugly, but I suppose this is what Perl wants.
>>>> 
>>>>> + # Abort with error "no url", if no URL has been given.
>>>>> + return "no url" unless ($url);
>>>> 
>>>> Could we rather die here with an error? Just returning a string here
>>>> seems to be too easy to ignore in case something above went wrong.
>>> 
>>> Technically you are right, but let me explain the reason for this kind
>>> of error handling a bit further down.
>>> 
>>>> 
>>>>> +
>>>>> + my %etags = ();
>>>>> + my $tmpfile;
>>>>> +
>>>>> + # Read-in proxysettings.
>>>>> + my %proxysettings=();
>>>>> + &readhash("${General::swroot}/proxy/settings", \%proxysettings);
>>>>> +
>>>>> + # Create a user agent instance.
>>>>> + #
>>>>> + # Request SSL hostname verification and specify path
>>>>> + # to the CA file.
>>>>> + my $ua = LWP::UserAgent->new(
>>>>> + ssl_opts => {
>>>>> + SSL_ca_file     => '/etc/ssl/cert.pem',
>>>>> + verify_hostname => 1,
>>>>> + },
>>>>> + );
>>>>> +
>>>>> + # Set timeout to 10 seconds.
>>>>> + $ua->timeout(10);
>>>> 
>>>> A 10 second timeout also sounds rather optimistic considering some
>>>> slower, overloaded mirrors or those that are quite far away. For any
>>>> tasks like downloading data in the background we could afford longer.
>>> 
>>> Accepted, what would be a good default value here?
>> 
>> 60 seconds? I think it makes sense to make this optionally configurable so that for the IP check that has been changed here, we will only wait around 10 seconds as it is a simple API request.
> 
> 
> I'll  implement it as optional and config-able feature. Default if not set would be 60 seconds.

Agreed.

>> 
>>> 
>>>> 
>>>>> +
>>>>> + # Assign maximum download size if set.
>>>>> + $ua->max_size($max_size) if ($max_size);
>>>>> +
>>>>> + # Generate UserAgent.
>>>>> + my $agent = &MakeUserAgent();
>>>>> +
>>>>> + # Set the generated UserAgent.
>>>>> + $ua->agent($agent);
>>>>> +
>>>>> + # Check if an upstream proxy is configured.
>>>>> + if ($proxysettings{'UPSTREAM_PROXY'}) {
>>>>> + my $proxy_url;
>>>>> +
>>>>> + $proxy_url = "http://";
>>>>> +
>>>>> + # Check if the proxy requires authentication.
>>>>> + if (($proxysettings{'UPSTREAM_USER'}) &&
>>>>> ($proxysettings{'UPSTREAM_PASSWORD'})) {
>>>>> + $proxy_url .=
>>>>> "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD
>>>>> '}\@";
>>>>> + }
>>>>> +
>>>>> + # Add proxy server address and port.
>>>>> + $proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
>>>>> +
>>>>> + # Append proxy settings.
>>>>> + $ua->proxy(['http', 'https'], $proxy_url);
>>>>> + }
>>>>> +
>>>>> + # Create a HTTP request element and pass the given URL to it.
>>>>> + my $request = HTTP::Request->new(GET => $url);
>>>>> +
>>>>> + # Check if a file to store the output has been provided.
>>>>> + if ($file) {
>>>>> + # Check if the given file already exits, because it has been
>>>>> downloaded in the past.
>>>>> + #
>>>>> + # In this case we are requesting the server if the remote file
>>>>> has been changed or not.
>>>>> + # This will be done by sending the modification time in a special
>>>>> HTTP header.
>>>>> + if (-f $file) {
>>>>> + # Call stat on the file.
>>>>> + my $stat = stat($file);
>>>>> +
>>>>> + # Omit the mtime of the existing file.
>>>>> + my $mtime = $stat->mtime;
>>>>> +
>>>>> + # Convert the timestamp into right format.
>>>>> + my $http_date = time2str($mtime);
>>>>> +
>>>>> + # Add the If-Modified-Since header to the request to ask the
>>>>> server if the
>>>>> + # file has been modified.
>>>>> + $request->header( 'If-Modified-Since' => "$http_date" );
>>>>> + }
>>>>> +
>>>>> + # Generate temporary file name, located in the tempoary download
>>>>> directory and with a suffix of ".tmp".
>>>>> + # The downloaded file will be stored there until some sanity
>>>>> checks are performed.
>>>>> + my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR =>
>>>>> "$tmp_dl_directory/", UNLINK => 0 );
>>>>> + $tmpfile = $tmp->filename();
>>>>> + }
>>>>> +
>>>>> + # Check if an file for etags has been given.
>>>>> + if ($etags_file) {
>>>>> + # Read-in Etags file for known Etags if the file is present.
>>>>> + &readhash("$etags_file", \%etags) if (-f $etags_file);
>>>>> +
>>>>> + # Check if an Etag for the current provider is stored.
>>>>> + if ($etags{$etagprefix}) {
>>>>> + # Grab the stored tag.
>>>>> + my $etag = $etags{$etagprefix};
>>>>> +
>>>>> + # Add an "If-None-Match header to the request to ask the server
>>>>> if the
>>>>> + # file has been modified.
>>>>> + $request->header( 'If-None-Match' => $etag );
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + my $dl_attempt = 1;
>>>>> + my $response;
>>>>> +
>>>>> + # Download and retry on failure.
>>>>> + while ($dl_attempt <= $max_dl_attempts) {
>>>>> + # Perform the request and save the output into the tmpfile if
>>>>> requested.
>>>>> + $response = $ua->request($request, $tmpfile);
>>>>> +
>>>>> + # Check if the download was successfull.
>>>>> + if($response->is_success) {
>>>>> + # Break loop.
>>>>> + last;
>>>>> +
>>>>> + # Check if the server responds with 304 (Not Modified).
>>>>> + } elsif ($response->code == 304) {
>>>>> + # Remove temporary file, if one exists.
>>>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>>>> +
>>>>> + # Return "not modified".
>>>>> + return "not modified";
>>>> 
>>>> Same here. It feels like strings are not the most ideal return code.
>>> 
>>> As promised up, I'm going to explain why I decided to return various
>>> strings in case of different errors. Calling die() immediately
>>> terminates the perl (main) script.
>> 
>> This is correct, and I never really like this behaviour. However, Perl does not know exceptions and so we cannot gracefully throw an error.
>> 
>> But, not passing a URL is a programming error and that should get some attention.
> 
> 
> Yeah, you have take care of this kind of perl behaviour and I'm also not in favour of it. That was the reason why I decided to do it in this way.
> 
> I see the reason why you are recommending to die in case no url has passed - I'll adjust the code.
> 
> Best regards,
> 
> -Stefan
> 
>> 
>> The “not modified” case should of course not call die.
>> 
>>> So for example when processing a list of downloads and such an
>>> event/exception/error appeared in my opinion the main script, which
>>> called the function should decide how to go further.
>>> 
>>> This is very similar to C where you call the function and take care of
>>> the return code and decide what to do next.
>> 
>> Usually in C we return an integer or even better an enum.
>> 
>>> So in this case I have to disagree with you and strongly recommend to
>>> keep the error processing at it is.
>>> 
>>> Best regards,
>>> 
>>> -Stefan
>>> 
>>>> 
>>>>> +
>>>>> + # Check if we ran out of download re-tries.
>>>>> + } elsif ($dl_attempt eq $max_dl_attempts) {
>>>>> + # Obtain error.
>>>>> + my $error = $response->content;
>>>>> +
>>>>> + # Remove temporary file, if one exists.
>>>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>>>> +
>>>>> + # Return the error message from response..
>>>>> + return "$error";
>>>>> + }
>>>>> +
>>>>> + # Remove temporary file, if one exists.
>>>>> + unlink("$tmpfile") if (-e "$tmpfile");
>>>>> +
>>>>> + # Increase download attempt counter.
>>>>> + $dl_attempt++;
>>>>> + }
>>>>> +
>>>>> + # Obtain the connection headers.
>>>>> + my $headers = $response->headers;
>>>>> +
>>>>> + # Check if an Etag file has been provided.
>>>>> + if ($etags_file) {
>>>>> + # Grab the Etag from the response if the server provides one.
>>>>> + if ($response->header('Etag')) {
>>>>> + # Add the provided Etag to the hash of tags.
>>>>> + $etags{$etagprefix} = $response->header('Etag');
>>>>> +
>>>>> + # Write the etags file.
>>>>> + &writehash($etags_file, \%etags);
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + # Check if the response should be stored on disk.
>>>>> + if ($file) {
>>>>> + # Get the remote size of the content.
>>>>> + my $remote_size = $response->header('Content-Length');
>>>>> +
>>>>> + # Perform a stat on the temporary file.
>>>>> + my $stat = stat($tmpfile);
>>>>> +
>>>>> + # Grab the size of the stored temporary file.
>>>>> + my $local_size = $stat->size;
>>>>> +
>>>>> + # Check if both sizes are equal.
>>>>> + if(($remote_size) && ($remote_size ne $local_size)) {
>>>>> + # Delete the temporary file.
>>>>> + unlink("$tmpfile");
>>>>> +
>>>>> + # Abort and return "incomplete download" as error.
>>>>> + return "incomplete download";
>>>>> + }
>>>>> +
>>>>> + # Move the temporaray file to the desired file by overwriting a
>>>>> may
>>>>> + # existing one.
>>>>> + move("$tmpfile", "$file");
>>>>> +
>>>>> + # Omit the timestamp from response header, when the file has been
>>>>> modified the
>>>>> + # last time.
>>>>> + my $last_modified = $headers->last_modified;
>>>>> +
>>>>> + # Check if we got a last-modified value from the server.
>>>>> + if ($last_modified) {
>>>>> + # Assign the last-modified timestamp as mtime to the
>>>>> + # stored file.
>>>>> + utime(time(), "$last_modified", "$file");
>>>>> + }
>>>>> +
>>>>> + # Delete temporary file.
>>>>> + unlink("$tmpfile");
>>>>> +
>>>>> + # If we got here, everything worked fine. Return nothing.
>>>>> + return;
>>>>> + } else {
>>>>> + # Decode the response content and return it.
>>>>> + return $response->decoded_content;
>>>>> + }
>>>>> +}
>>>>> +
>>>>> # Cloud Stuff
>>>>> 
>>>>> sub running_in_cloud() {
>>>>> -- 
>>>>> 2.47.2
> 



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-03-26 16:18 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-22 14:57 [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function Stefan Schantl
2025-03-22 14:57 ` [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function Stefan Schantl
2025-03-23 12:08   ` Michael Tremer
2025-03-22 14:57 ` [PATCH 3/3] ids-functions.pl: Use new general downloader function Stefan Schantl
2025-03-23 12:08 ` [PATCH 1/3] general-functions.pl: Add LWP-based flexible " Michael Tremer
2025-03-23 19:00   ` Stefan Schantl
2025-03-24 10:02     ` Michael Tremer
2025-03-26 12:47       ` Stefan Schantl
2025-03-26 16:18         ` Michael Tremer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox