From: Stefan Schantl <stefan.schantl@ipfire.org>
To: development@lists.ipfire.org
Cc: Stefan Schantl <stefan.schantl@ipfire.org>
Subject: [PATCH 1/3] general-functions.pl: Add LWP-based flexible downloader function.
Date: Sat, 22 Mar 2025 15:57:22 +0100 [thread overview]
Message-ID: <20250322145724.4593-1-stefan.schantl@ipfire.org> (raw)
This function can be used to grab content and/or store it into files.
Signed-off-by: Stefan Schantl <stefan.schantl@ipfire.org>
---
config/cfgroot/general-functions.pl | 262 ++++++++++++++++++++++++++++
1 file changed, 262 insertions(+)
diff --git a/config/cfgroot/general-functions.pl b/config/cfgroot/general-functions.pl
index 8ba6e3f79..cb8df69c6 100644
--- a/config/cfgroot/general-functions.pl
+++ b/config/cfgroot/general-functions.pl
@@ -20,6 +20,23 @@ use IO::Socket;
use Net::SSLeay;
use Net::IPv4Addr qw(:all);
+# Load module to move files.
+use File::Copy;
+
+# Load module to get file stats.
+use File::stat;
+
+# Load module to deal with temporary files.
+use File::Temp;
+
+# Load module to deal with the date formats used by the HTTP protocol.
+use HTTP::Date;
+
+# Load the libwwwperl User Agent module.
+use LWP::UserAgent;
+
+$|=1; # line buffering
+
$General::version = 'VERSION';
$General::swroot = 'CONFIG_ROOT';
$General::noipprefix = 'noipg-';
@@ -1346,6 +1363,251 @@ sub generateProtoTransHash () {
return %protocols;
}
+# Function to grab a given URL content or to download and store it on disk.
+#
+# The function requires a configuration hash to be passed.
+#
+# The following options (hash keys) are supported:
+#
+# URL -> The URL to the content or file. REQUIRED!
+# FILE -> The filename as fullpath where the content/file should be stored on disk.
+# ETAGSFILE -> A filename again as fullpath where Etags should be stored and read.
+# ETAGPREFIX -> In case a custom etag name should be used, otherwise it defaults to the given URL.
+# MAXSIZE -> In bytes until the downloader will abort downloading. (example: 10_485_760 for 10MB)
+#
+# If a file is given an If-Modified-Since header will be generated from the last modified timestamp
+# of an already stored file. In case an Etag file is specified an If-None-Match header will be added to
+# the request - Both can be used at the same time.
+#
+# In case no FILE option has been passed to the function, the content of the requested URL will be returned.
+#
+# Return codes (if FILE is used):
+#
+# nothing - On success
+# no url - If no URL has been specified.
+# not_modified - In case the servers responds with "Not modified" (304)
+# dl_error - If the requested URL cannot be accessed.
+# incomplete download - In case the size of the local file does not match the remote content_lenght.
+#
+sub downloader (%) {
+ my (%args) = @_;
+
+ # Remap args hash and convert all keys into upper case format.
+ %args = map { uc $_ => $args{$_} } keys %args;
+
+ # The amount of download attempts before giving up and
+ # logging an error.
+ my $max_dl_attempts = 3;
+
+ # Temporary directory to download the files.
+ my $tmp_dl_directory = "/var/tmp";
+
+ # Assign hash values.
+ my $url = $args{"URL"} if (exists($args{"URL"}));
+ my $file = $args{"FILE"} if (exists($args{"FILE"}));
+ my $etags_file = $args{"ETAGSFILE"} if (exists($args{"ETAGSFILE"}));
+ my $etagprefix = $url;
+ $etagprefix = $args{"ETAGPREFIX"} if (exists($args{"ETAGPREFIX"}));
+ my $max_size = $args{"MAXSIZE"} if (exists($args{"MAXSIZE"}));
+
+ # Abort with error "no url", if no URL has been given.
+ return "no url" unless ($url);
+
+ my %etags = ();
+ my $tmpfile;
+
+ # Read-in proxysettings.
+ my %proxysettings=();
+ &readhash("${General::swroot}/proxy/settings", \%proxysettings);
+
+ # Create a user agent instance.
+ #
+ # Request SSL hostname verification and specify path
+ # to the CA file.
+ my $ua = LWP::UserAgent->new(
+ ssl_opts => {
+ SSL_ca_file => '/etc/ssl/cert.pem',
+ verify_hostname => 1,
+ },
+ );
+
+ # Set timeout to 10 seconds.
+ $ua->timeout(10);
+
+ # Assign maximum download size if set.
+ $ua->max_size($max_size) if ($max_size);
+
+ # Generate UserAgent.
+ my $agent = &MakeUserAgent();
+
+ # Set the generated UserAgent.
+ $ua->agent($agent);
+
+ # Check if an upstream proxy is configured.
+ if ($proxysettings{'UPSTREAM_PROXY'}) {
+ my $proxy_url;
+
+ $proxy_url = "http://";
+
+ # Check if the proxy requires authentication.
+ if (($proxysettings{'UPSTREAM_USER'}) && ($proxysettings{'UPSTREAM_PASSWORD'})) {
+ $proxy_url .= "$proxysettings{'UPSTREAM_USER'}\:$proxysettings{'UPSTREAM_PASSWORD'}\@";
+ }
+
+ # Add proxy server address and port.
+ $proxy_url .= $proxysettings{'UPSTREAM_PROXY'};
+
+ # Append proxy settings.
+ $ua->proxy(['http', 'https'], $proxy_url);
+ }
+
+ # Create a HTTP request element and pass the given URL to it.
+ my $request = HTTP::Request->new(GET => $url);
+
+ # Check if a file to store the output has been provided.
+ if ($file) {
+ # Check if the given file already exits, because it has been downloaded in the past.
+ #
+ # In this case we are requesting the server if the remote file has been changed or not.
+ # This will be done by sending the modification time in a special HTTP header.
+ if (-f $file) {
+ # Call stat on the file.
+ my $stat = stat($file);
+
+ # Omit the mtime of the existing file.
+ my $mtime = $stat->mtime;
+
+ # Convert the timestamp into right format.
+ my $http_date = time2str($mtime);
+
+ # Add the If-Modified-Since header to the request to ask the server if the
+ # file has been modified.
+ $request->header( 'If-Modified-Since' => "$http_date" );
+ }
+
+ # Generate temporary file name, located in the tempoary download directory and with a suffix of ".tmp".
+ # The downloaded file will be stored there until some sanity checks are performed.
+ my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "$tmp_dl_directory/", UNLINK => 0 );
+ $tmpfile = $tmp->filename();
+ }
+
+ # Check if an file for etags has been given.
+ if ($etags_file) {
+ # Read-in Etags file for known Etags if the file is present.
+ &readhash("$etags_file", \%etags) if (-f $etags_file);
+
+ # Check if an Etag for the current provider is stored.
+ if ($etags{$etagprefix}) {
+ # Grab the stored tag.
+ my $etag = $etags{$etagprefix};
+
+ # Add an "If-None-Match header to the request to ask the server if the
+ # file has been modified.
+ $request->header( 'If-None-Match' => $etag );
+ }
+ }
+
+ my $dl_attempt = 1;
+ my $response;
+
+ # Download and retry on failure.
+ while ($dl_attempt <= $max_dl_attempts) {
+ # Perform the request and save the output into the tmpfile if requested.
+ $response = $ua->request($request, $tmpfile);
+
+ # Check if the download was successfull.
+ if($response->is_success) {
+ # Break loop.
+ last;
+
+ # Check if the server responds with 304 (Not Modified).
+ } elsif ($response->code == 304) {
+ # Remove temporary file, if one exists.
+ unlink("$tmpfile") if (-e "$tmpfile");
+
+ # Return "not modified".
+ return "not modified";
+
+ # Check if we ran out of download re-tries.
+ } elsif ($dl_attempt eq $max_dl_attempts) {
+ # Obtain error.
+ my $error = $response->content;
+
+ # Remove temporary file, if one exists.
+ unlink("$tmpfile") if (-e "$tmpfile");
+
+ # Return the error message from response..
+ return "$error";
+ }
+
+ # Remove temporary file, if one exists.
+ unlink("$tmpfile") if (-e "$tmpfile");
+
+ # Increase download attempt counter.
+ $dl_attempt++;
+ }
+
+ # Obtain the connection headers.
+ my $headers = $response->headers;
+
+ # Check if an Etag file has been provided.
+ if ($etags_file) {
+ # Grab the Etag from the response if the server provides one.
+ if ($response->header('Etag')) {
+ # Add the provided Etag to the hash of tags.
+ $etags{$etagprefix} = $response->header('Etag');
+
+ # Write the etags file.
+ &writehash($etags_file, \%etags);
+ }
+ }
+
+ # Check if the response should be stored on disk.
+ if ($file) {
+ # Get the remote size of the content.
+ my $remote_size = $response->header('Content-Length');
+
+ # Perform a stat on the temporary file.
+ my $stat = stat($tmpfile);
+
+ # Grab the size of the stored temporary file.
+ my $local_size = $stat->size;
+
+ # Check if both sizes are equal.
+ if(($remote_size) && ($remote_size ne $local_size)) {
+ # Delete the temporary file.
+ unlink("$tmpfile");
+
+ # Abort and return "incomplete download" as error.
+ return "incomplete download";
+ }
+
+ # Move the temporaray file to the desired file by overwriting a may
+ # existing one.
+ move("$tmpfile", "$file");
+
+ # Omit the timestamp from response header, when the file has been modified the
+ # last time.
+ my $last_modified = $headers->last_modified;
+
+ # Check if we got a last-modified value from the server.
+ if ($last_modified) {
+ # Assign the last-modified timestamp as mtime to the
+ # stored file.
+ utime(time(), "$last_modified", "$file");
+ }
+
+ # Delete temporary file.
+ unlink("$tmpfile");
+
+ # If we got here, everything worked fine. Return nothing.
+ return;
+ } else {
+ # Decode the response content and return it.
+ return $response->decoded_content;
+ }
+}
+
# Cloud Stuff
sub running_in_cloud() {
--
2.47.2
next reply other threads:[~2025-03-22 15:07 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-22 14:57 Stefan Schantl [this message]
2025-03-22 14:57 ` [PATCH 2/3] general-functions.pl: Use new downloader for FetchPublicIp function Stefan Schantl
2025-03-23 12:08 ` Michael Tremer
2025-03-22 14:57 ` [PATCH 3/3] ids-functions.pl: Use new general downloader function Stefan Schantl
2025-03-23 12:08 ` [PATCH 1/3] general-functions.pl: Add LWP-based flexible " Michael Tremer
2025-03-23 19:00 ` Stefan Schantl
2025-03-24 10:02 ` Michael Tremer
2025-03-26 12:47 ` Stefan Schantl
2025-03-26 16:18 ` Michael Tremer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250322145724.4593-1-stefan.schantl@ipfire.org \
--to=stefan.schantl@ipfire.org \
--cc=development@lists.ipfire.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox