Even if the servers do not support HEAD requests, the remote filesize (content_length) can be obtained from the connection headers.
This generic method works for all servers and therefore we do not need the code for handle sourcefire servers in a different way anymore.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org --- config/cfgroot/ids-functions.pl | 43 +++++---------------------------- 1 file changed, 6 insertions(+), 37 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index 94dccc8ae..eb276030b 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -354,43 +354,6 @@ sub downloadruleset ($) { return 1; }
- # Variable to store the filesize of the remote object. - my $remote_filesize; - - # The sourcfire (snort rules) does not allow to send "HEAD" requests, so skip this check - # for this webserver. - # - # Check if the ruleset source contains "snort.org". - unless ($url =~ /.snort.org/) { - # Pass the requrested url to the downloader. - my $request = HTTP::Request->new(HEAD => $url); - - # Accept the html header. - $request->header('Accept' => 'text/html'); - - # Perform the request and fetch the html header. - my $response = $downloader->request($request); - - # Check if there was any error. - unless ($response->is_success) { - # Obtain error. - my $error = $response->status_line(); - - # Log error message. - &_log_to_syslog("Unable to download the ruleset. ($error)"); - - # Return "1" - false. - return 1; - } - - # Assign the fetched header object. - my $header = $response->headers(); - - # Grab the remote file size from the object and store it in the - # variable. - $remote_filesize = $header->content_length; - } - # Load perl module to deal with temporary files. use File::Temp;
@@ -416,6 +379,12 @@ sub downloadruleset ($) { return 1; }
+ # Obtain the connection headers. + my $headers = $response->headers; + + # Get the remote size of the downloaded file. + my $remote_filesize = $headers->content_length; + # Load perl stat module. use File::stat;
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org --- config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index eb276030b..c8bc52b1b 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -256,6 +256,10 @@ sub downloadruleset ($) { # If no provider is given default to "all". $provider //= 'all';
+ # The amount of download attempts before giving up and + # logging an error. + my $max_dl_attempts = 5; + # Hash to store the providers and access id's, for which rules should be downloaded. my %sheduled_providers = ();
@@ -364,19 +368,33 @@ sub downloadruleset ($) { # Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
- # Perform the request and save the output into the tmpfile. - my $response = $downloader->request($request, $tmpfile); + my $dl_attempt = 1; + my $response;
- # Check if there was any error. - unless ($response->is_success) { - # Obtain error. - my $error = $response->content; + # Download and retry on failure. + while ($dl_attempt <= $max_dl_attempts) { + # Perform the request and save the output into the tmpfile. + $response = $downloader->request($request, $tmpfile);
- # Log error message. - &_log_to_syslog("Unable to download the ruleset. ($error)"); + # Check if the download was successfull. + if($response->is_success) { + # Break loop. + last;
- # Return "1" - false. - return 1; + # Check if we ran out of download re-tries. + } elsif ($dl_attempt eq $max_dl_attempts) { + # Obtain error. + my $error = $response->content; + + # Log error message. + &_log_to_syslog("Unable to download the ruleset. ($error)"); + + # Return "1" - false. + return 1; + } + + # Increase download attempt counter. + $dl_attempt++; }
# Obtain the connection headers.
Hello,
What is the rationale for five attempts? Why not three?
-Michael
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index eb276030b..c8bc52b1b 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -256,6 +256,10 @@ sub downloadruleset ($) { # If no provider is given default to "all". $provider //= 'all';
- # The amount of download attempts before giving up and
- # logging an error.
- my $max_dl_attempts = 5;
- # Hash to store the providers and access id's, for which rules should be downloaded. my %sheduled_providers = ();
@@ -364,19 +368,33 @@ sub downloadruleset ($) { # Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
# Perform the request and save the output into the tmpfile.
my $response = $downloader->request($request, $tmpfile);
my $dl_attempt = 1;
my $response;
# Check if there was any error.
unless ($response->is_success) {
# Obtain error.
my $error = $response->content;
# Download and retry on failure.
while ($dl_attempt <= $max_dl_attempts) {
# Perform the request and save the output into the tmpfile.
$response = $downloader->request($request, $tmpfile);
# Log error message.
&_log_to_syslog("Unable to download the ruleset. \($error\)");
# Check if the download was successfull.
if($response->is_success) {
# Break loop.
last;
# Return "1" - false.
return 1;
# Check if we ran out of download re-tries.
} elsif ($dl_attempt eq $max_dl_attempts) {
# Obtain error.
my $error = $response->content;
# Log error message.
&_log_to_syslog("Unable to download the ruleset. \($error\)");
# Return "1" - false.
return 1;
}
# Increase download attempt counter.
$dl_attempt++;
}
# Obtain the connection headers.
-- 2.30.2
Hello Michael,
there was no special intention - I simple wanted to give the downloader more than just one chance to do it's job. For this I needed a value so I simple choosed "5".
But I'm also fine with "3" or any other suggestion.
Best regards,
-Stefan
Hello,
What is the rationale for five attempts? Why not three?
-Michael
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids- functions.pl index eb276030b..c8bc52b1b 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -256,6 +256,10 @@ sub downloadruleset ($) { # If no provider is given default to "all". $provider //= 'all';
+ # The amount of download attempts before giving up and + # logging an error. + my $max_dl_attempts = 5;
# Hash to store the providers and access id's, for which rules should be downloaded. my %sheduled_providers = ();
@@ -364,19 +368,33 @@ sub downloadruleset ($) { # Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
- # Perform the request and save the output into the tmpfile. - my $response = $downloader->request($request, $tmpfile); + my $dl_attempt = 1; + my $response;
- # Check if there was any error. - unless ($response->is_success) { - # Obtain error. - my $error = $response->content; + # Download and retry on failure. + while ($dl_attempt <= $max_dl_attempts) { + # Perform the request and save the output into the tmpfile. + $response = $downloader->request($request, $tmpfile);
- # Log error message. - &_log_to_syslog("Unable to download the ruleset. ($error)"); + # Check if the download was successfull. + if($response->is_success) { + # Break loop. + last;
- # Return "1" - false. - return 1; + # Check if we ran out of download re-tries. + } elsif ($dl_attempt eq $max_dl_attempts) { + # Obtain error. + my $error = $response->content;
+ # Log error message. + &_log_to_syslog("Unable to download the ruleset. ($error)");
+ # Return "1" - false. + return 1; + }
+ # Increase download attempt counter. + $dl_attempt++; }
# Obtain the connection headers.
2.30.2
Hello,
I generally don’t disagree with trying again. This should however happen after a little while (let’s say an hour or so).
Trying more than three times at one time is a bit excessive I would say. Let’s not try to DDoS other people’s systems :)
-Michael
On 24 Mar 2022, at 18:23, Stefan Schantl stefan.schantl@ipfire.org wrote:
Hello Michael,
there was no special intention - I simple wanted to give the downloader more than just one chance to do it's job. For this I needed a value so I simple choosed "5".
But I'm also fine with "3" or any other suggestion.
Best regards,
-Stefan
Hello,
What is the rationale for five attempts? Why not three?
-Michael
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids- functions.pl index eb276030b..c8bc52b1b 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -256,6 +256,10 @@ sub downloadruleset ($) { # If no provider is given default to "all". $provider //= 'all';
# The amount of download attempts before giving up and
# logging an error.
my $max_dl_attempts = 5;
# Hash to store the providers and access id's, for which
rules should be downloaded. my %sheduled_providers = ();
@@ -364,19 +368,33 @@ sub downloadruleset ($) { # Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
# Perform the request and save the output into the
tmpfile.
my $response = $downloader->request($request,
$tmpfile);
my $dl_attempt = 1;
my $response;
# Check if there was any error.
unless ($response->is_success) {
# Obtain error.
my $error = $response->content;
# Download and retry on failure.
while ($dl_attempt <= $max_dl_attempts) {
# Perform the request and save the output
into the tmpfile.
$response = $downloader->request($request,
$tmpfile);
# Log error message.
&_log_to_syslog("Unable to download the
ruleset. ($error)");
# Check if the download was successfull.
if($response->is_success) {
# Break loop.
last;
# Return "1" - false.
return 1;
# Check if we ran out of download re-tries.
} elsif ($dl_attempt eq $max_dl_attempts) {
# Obtain error.
my $error = $response->content;
# Log error message.
&_log_to_syslog("Unable to download
the ruleset. ($error)");
# Return "1" - false.
return 1;
}
# Increase download attempt counter.
$dl_attempt++; } # Obtain the connection headers.
-- 2.30.2
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org --- config/cfgroot/ids-functions.pl | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index c8bc52b1b..dfbeb1a7d 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -393,6 +393,9 @@ sub downloadruleset ($) { return 1; }
+ # Remove temporary file, if one exists. + unlink("$tmpfile") if (-e "$tmpfile"); + # Increase download attempt counter. $dl_attempt++; }
When using the "If-Modified-Since" header, the server can be requested if a modified version of the file can be served.
In case that is true, the file will be sent and stored by the downloader function. If the file has not been touched since the last time, the server will respond with the code "304" (Not modified).
This tells us, that the current stored file is the latest one (still up-to-date) and we safely can skip the download attempt for this provider.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org --- config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index dfbeb1a7d..d7df41dd1 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -365,9 +365,25 @@ sub downloadruleset ($) { my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "/var/tmp/", UNLINK => 0 ); my $tmpfile = $tmp->filename();
+ # Genarate and assign file name and path to store the downloaded rules file. + my $dl_rulesfile = &_get_dl_rulesfile($provider); + + # Load perl module to deal with file atributes. + use File::stat; + + # Get the mtime of the rulesfile if it exists. + my $mtime = (stat($dl_rulesfile)->mtime) if (-f $dl_rulesfile); + + # Convert the mtime into gmtime format. + my $gmtime = gmtime($mtime || 0); + # Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
+ # Add the If-Modified-Since header to the request, containing the omited and converted + # mtime of the downloaded rules file, if one is present. + $request->header( 'If-Modified-Since' => "$gmtime" ); + my $dl_attempt = 1; my $response;
@@ -381,6 +397,14 @@ sub downloadruleset ($) { # Break loop. last;
+ # Check if the server responds with 304 (Not Modified). + } elsif ($response->code == 304) { + # Log to syslog. + &_log_to_syslog("Ruleset is up-to-date, no update required."); + + # Nothing to be done, the ruleset is up-to-date. + return; + # Check if we ran out of download re-tries. } elsif ($dl_attempt eq $max_dl_attempts) { # Obtain error. @@ -406,6 +430,10 @@ sub downloadruleset ($) { # Get the remote size of the downloaded file. my $remote_filesize = $headers->content_length;
+ # Get the timestamp from header, when the file has been modified the + # last time. + my $last_modified = $headers->last_modified; + # Load perl stat module. use File::stat;
@@ -428,9 +456,6 @@ sub downloadruleset ($) { return 1; }
- # Genarate and assign file name and path to store the downloaded rules file. - my $dl_rulesfile = &_get_dl_rulesfile($provider); - # Check if a file name could be obtained. unless ($dl_rulesfile) { # Log error message. @@ -449,6 +474,13 @@ sub downloadruleset ($) { # Overwrite the may existing rulefile or tarball with the downloaded one. move("$tmpfile", "$dl_rulesfile");
+ # Check if the server respond contained a last_modified value. + if ($last_modified) { + # Assign the last modified timestamp from server as mtime to the + # rules file. + utime(time(), "$last_modified", "$dl_rulesfile"); + } + # Delete temporary file. unlink("$tmpfile");
Hello,
I like this mechanism. It is quite powerful since it will save a lot of unnecessary downloads and CPU cycles. However, I am not sure if all providers will support this because some use crappy CDNs which do not give anything about conforming to any RFC.
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
When using the "If-Modified-Since" header, the server can be requested if a modified version of the file can be served.
In case that is true, the file will be sent and stored by the downloader function. If the file has not been touched since the last time, the server will respond with the code "304" (Not modified).
This tells us, that the current stored file is the latest one (still up-to-date) and we safely can skip the download attempt for this provider.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index dfbeb1a7d..d7df41dd1 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -365,9 +365,25 @@ sub downloadruleset ($) { my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "/var/tmp/", UNLINK => 0 ); my $tmpfile = $tmp->filename();
# Genarate and assign file name and path to store the downloaded rules file.
my $dl_rulesfile = &_get_dl_rulesfile($provider);
# Load perl module to deal with file atributes.
use File::stat;
I don’t like modules in the middle of a script. This makes testing a new perl update more difficult. If I can load the script, I expect it to have all dependencies it needs. This does not work when modules are being loaded later on. I would say that we need to cover those things by a module test then.
I would prefer reliability over performance (I assume that this your argument why to load it only when you need it).
# Get the mtime of the rulesfile if it exists.
my $mtime = (stat($dl_rulesfile)->mtime) if (-f $dl_rulesfile);
I suggest making this a little bit simpler:
Create the request first, then stat the file (you don’t need to check if it exists first because stat will just tell you).
Then add the header if you have a timestamp; if not, then don’t add a useless header. This is a lot more straight forward :)
# Convert the mtime into gmtime format.
my $gmtime = gmtime($mtime || 0);
# Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
# Add the If-Modified-Since header to the request, containing the omited and converted
# mtime of the downloaded rules file, if one is present.
$request->header( 'If-Modified-Since' => "$gmtime" );
Will this survive any redirects? I know that some redirect to AWS S3 and you will need to make sure that the header is being sent when you follow the redirect - assuming S3 supports this which I think they would.
- my $dl_attempt = 1; my $response;
@@ -381,6 +397,14 @@ sub downloadruleset ($) { # Break loop. last;
# Check if the server responds with 304 (Not Modified).
} elsif ($response->code == 304) {
# Log to syslog.
&_log_to_syslog("Ruleset is up-to-date, no update required.");
# Nothing to be done, the ruleset is up-to-date.
return;
# Check if we ran out of download re-tries. } elsif ($dl_attempt eq $max_dl_attempts) { # Obtain error.
@@ -406,6 +430,10 @@ sub downloadruleset ($) { # Get the remote size of the downloaded file. my $remote_filesize = $headers->content_length;
# Get the timestamp from header, when the file has been modified the
# last time.
my $last_modified = $headers->last_modified;
- # Load perl stat module. use File::stat;
You are loading the module again.
@@ -428,9 +456,6 @@ sub downloadruleset ($) { return 1; }
# Genarate and assign file name and path to store the downloaded rules file.
my $dl_rulesfile = &_get_dl_rulesfile($provider);
- # Check if a file name could be obtained. unless ($dl_rulesfile) { # Log error message.
@@ -449,6 +474,13 @@ sub downloadruleset ($) { # Overwrite the may existing rulefile or tarball with the downloaded one. move("$tmpfile", "$dl_rulesfile");
# Check if the server respond contained a last_modified value.
if ($last_modified) {
# Assign the last modified timestamp from server as mtime to the
# rules file.
utime(time(), "$last_modified", "$dl_rulesfile");
}
- # Delete temporary file. unlink("$tmpfile");
-- 2.30.2
-Michael
Hello Michael,
thanks for your thoughts and feedback.
Hello,
I like this mechanism. It is quite powerful since it will save a lot of unnecessary downloads and CPU cycles. However, I am not sure if all providers will support this because some use crappy CDNs which do not give anything about conforming to any RFC.
Currently I'm only aware that content which is hosted on github.com cannot be checked with this header flag. They are using Etags which also can be used for cache management, but works in a completely different way.
There are plenty more providers, some which requires a subscription which I do not own - so they currently remains untested.
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
When using the "If-Modified-Since" header, the server can be requested if a modified version of the file can be served.
In case that is true, the file will be sent and stored by the downloader function. If the file has not been touched since the last time, the server will respond with the code "304" (Not modified).
This tells us, that the current stored file is the latest one (still up-to-date) and we safely can skip the download attempt for this provider.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids- functions.pl index dfbeb1a7d..d7df41dd1 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -365,9 +365,25 @@ sub downloadruleset ($) { my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "/var/tmp/", UNLINK => 0 ); my $tmpfile = $tmp->filename();
+ # Genarate and assign file name and path to store the downloaded rules file. + my $dl_rulesfile = &_get_dl_rulesfile($provider);
+ # Load perl module to deal with file atributes. + use File::stat;
I don’t like modules in the middle of a script. This makes testing a new perl update more difficult. If I can load the script, I expect it to have all dependencies it needs. This does not work when modules are being loaded later on. I would say that we need to cover those things by a module test then.
I would prefer reliability over performance (I assume that this your argument why to load it only when you need it).
I'll move the load of the modules to the top of the script. This is also the way all other (library) scripts are doing this.
This may will lead to some more resource consumption, but will help us to execute the script in a safe way - even when anything got updated.
Module tests generally are a good way - I'm thinking about implementing some at a later time (after some important changes are finished) for the "ids-functions.pl".
+ # Get the mtime of the rulesfile if it exists. + my $mtime = (stat($dl_rulesfile)->mtime) if (-f $dl_rulesfile);
I suggest making this a little bit simpler:
Create the request first, then stat the file (you don’t need to check if it exists first because stat will just tell you).
Then add the header if you have a timestamp; if not, then don’t add a useless header. This is a lot more straight forward :)
+ # Convert the mtime into gmtime format. + my $gmtime = gmtime($mtime || 0);
# Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
+ # Add the If-Modified-Since header to the request, containing the omited and converted + # mtime of the downloaded rules file, if one is present. + $request->header( 'If-Modified-Since' => "$gmtime" );
Will this survive any redirects? I know that some redirect to AWS S3 and you will need to make sure that the header is being sent when you follow the redirect - assuming S3 supports this which I think they would.
If the servers in between are not entirely broken or do silly stuff, they should.
my $dl_attempt = 1; my $response;
@@ -381,6 +397,14 @@ sub downloadruleset ($) { # Break loop. last;
+ # Check if the server responds with 304 (Not Modified). + } elsif ($response->code == 304) { + # Log to syslog. + &_log_to_syslog("Ruleset is up-to- date, no update required.");
+ # Nothing to be done, the ruleset is up-to-date. + return;
# Check if we ran out of download re-tries. } elsif ($dl_attempt eq $max_dl_attempts) { # Obtain error. @@ -406,6 +430,10 @@ sub downloadruleset ($) { # Get the remote size of the downloaded file. my $remote_filesize = $headers->content_length;
+ # Get the timestamp from header, when the file has been modified the + # last time. + my $last_modified = $headers->last_modified;
# Load perl stat module. use File::stat;
You are loading the module again.
Thanks, this is an additional point why, loading all required modules on top are a good idea. You do not have to think about which modules are already loaded or may be reloaded etc.
@@ -428,9 +456,6 @@ sub downloadruleset ($) { return 1; }
- # Genarate and assign file name and path to store the downloaded rules file. - my $dl_rulesfile = &_get_dl_rulesfile($provider);
# Check if a file name could be obtained. unless ($dl_rulesfile) { # Log error message. @@ -449,6 +474,13 @@ sub downloadruleset ($) { # Overwrite the may existing rulefile or tarball with the downloaded one. move("$tmpfile", "$dl_rulesfile");
+ # Check if the server respond contained a last_modified value. + if ($last_modified) { + # Assign the last modified timestamp from server as mtime to the + # rules file. + utime(time(), "$last_modified", "$dl_rulesfile"); + }
# Delete temporary file. unlink("$tmpfile");
-- 2.30.2
-Michael
-Stefan
Hello,
On 24 Mar 2022, at 18:50, Stefan Schantl stefan.schantl@ipfire.org wrote:
Hello Michael,
thanks for your thoughts and feedback.
Hello,
I like this mechanism. It is quite powerful since it will save a lot of unnecessary downloads and CPU cycles. However, I am not sure if all providers will support this because some use crappy CDNs which do not give anything about conforming to any RFC.
Currently I'm only aware that content which is hosted on github.com cannot be checked with this header flag. They are using Etags which also can be used for cache management, but works in a completely different way.
Yes, but we can still use them.
You can just store the Etag and send it with your next request.
There are plenty more providers, some which requires a subscription which I do not own - so they currently remains untested.
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
When using the "If-Modified-Since" header, the server can be requested if a modified version of the file can be served.
In case that is true, the file will be sent and stored by the downloader function. If the file has not been touched since the last time, the server will respond with the code "304" (Not modified).
This tells us, that the current stored file is the latest one (still up-to-date) and we safely can skip the download attempt for this provider.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 38 ++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids- functions.pl index dfbeb1a7d..d7df41dd1 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -365,9 +365,25 @@ sub downloadruleset ($) { my $tmp = File::Temp->new( SUFFIX => ".tmp", DIR => "/var/tmp/", UNLINK => 0 ); my $tmpfile = $tmp->filename();
- # Genarate and assign file name and path to store
the downloaded rules file.
- my $dl_rulesfile = &_get_dl_rulesfile($provider);
- # Load perl module to deal with file atributes.
- use File::stat;
I don’t like modules in the middle of a script. This makes testing a new perl update more difficult. If I can load the script, I expect it to have all dependencies it needs. This does not work when modules are being loaded later on. I would say that we need to cover those things by a module test then.
I would prefer reliability over performance (I assume that this your argument why to load it only when you need it).
I'll move the load of the modules to the top of the script. This is also the way all other (library) scripts are doing this.
Thank you.
This may will lead to some more resource consumption, but will help us to execute the script in a safe way - even when anything got updated.
Module tests generally are a good way - I'm thinking about implementing some at a later time (after some important changes are finished) for the "ids-functions.pl".
- # Get the mtime of the rulesfile if it exists.
- my $mtime = (stat($dl_rulesfile)->mtime) if (-f
$dl_rulesfile);
I suggest making this a little bit simpler:
Create the request first, then stat the file (you don’t need to check if it exists first because stat will just tell you).
Then add the header if you have a timestamp; if not, then don’t add a useless header. This is a lot more straight forward :)
- # Convert the mtime into gmtime format.
- my $gmtime = gmtime($mtime || 0);
# Pass the requested url to the downloader. my $request = HTTP::Request->new(GET => $url);
- # Add the If-Modified-Since header to the request,
containing the omited and converted
- # mtime of the downloaded rules file, if one is
present.
- $request->header( 'If-Modified-Since' => "$gmtime"
);
Will this survive any redirects? I know that some redirect to AWS S3 and you will need to make sure that the header is being sent when you follow the redirect - assuming S3 supports this which I think they would.
If the servers in between are not entirely broken or do silly stuff, they should.
This is in the control of the client. It should not matter what the server responds with.
my $dl_attempt = 1; my $response;
@@ -381,6 +397,14 @@ sub downloadruleset ($) { # Break loop. last;
- # Check if the server responds with 304
(Not Modified).
- } elsif ($response->code == 304) {
- # Log to syslog.
- &_log_to_syslog("Ruleset is up-to-
date, no update required.");
- # Nothing to be done, the ruleset
is up-to-date.
- return;
# Check if we ran out of download re-tries. } elsif ($dl_attempt eq $max_dl_attempts) { # Obtain error. @@ -406,6 +430,10 @@ sub downloadruleset ($) { # Get the remote size of the downloaded file. my $remote_filesize = $headers->content_length;
- # Get the timestamp from header, when the file has
been modified the
- # last time.
- my $last_modified = $headers->last_modified;
# Load perl stat module. use File::stat;
You are loading the module again.
Thanks, this is an additional point why, loading all required modules on top are a good idea. You do not have to think about which modules are already loaded or may be reloaded etc.
@@ -428,9 +456,6 @@ sub downloadruleset ($) { return 1; }
- # Genarate and assign file name and path to store
the downloaded rules file.
- my $dl_rulesfile = &_get_dl_rulesfile($provider);
# Check if a file name could be obtained. unless ($dl_rulesfile) { # Log error message. @@ -449,6 +474,13 @@ sub downloadruleset ($) { # Overwrite the may existing rulefile or tarball with the downloaded one. move("$tmpfile", "$dl_rulesfile");
- # Check if the server respond contained a
last_modified value.
- if ($last_modified) {
- # Assign the last modified timestamp from
server as mtime to the
- # rules file.
- utime(time(), "$last_modified",
"$dl_rulesfile");
- }
# Delete temporary file. unlink("$tmpfile");
-- 2.30.2
-Michael
-Stefan
Fixes #12805.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org --- config/cfgroot/ids-functions.pl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index d7df41dd1..9eb375bc9 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -226,7 +226,7 @@ sub checkdiskspace () { # Check if the available disk space is more than 300MB. if ($available < 300) { # Log error to syslog. - &_log_to_syslog("Not enough free disk space on /var. Only $available MB from 300 MB available."); + &_log_to_syslog("<ERROR> Not enough free disk space on /var. Only $available MB from 300 MB available.");
# Exit function and return "1" - False. return 1; @@ -270,7 +270,7 @@ sub downloadruleset ($) { # Check if a ruleset has been configured. unless(%used_providers) { # Log that no ruleset has been configured and abort. - &_log_to_syslog("No ruleset provider has been configured."); + &_log_to_syslog("<ERROR> No ruleset provider has been configured.");
# Return "1". return 1; @@ -333,7 +333,7 @@ sub downloadruleset ($) { # Loop through the hash of sheduled providers. foreach my $provider ( keys %sheduled_providers) { # Log download/update of the ruleset. - &_log_to_syslog("Downloading ruleset for provider: $provider."); + &_log_to_syslog("<INFO> Downloading ruleset for provider: $provider.");
# Grab the download url for the provider. my $url = $IDS::Ruleset::Providers{$provider}{'dl_url'}; @@ -354,7 +354,7 @@ sub downloadruleset ($) { # Abort if no url could be determined for the provider. unless ($url) { # Log error and abort. - &_log_to_syslog("Unable to gather a download URL for the selected ruleset provider."); + &_log_to_syslog("<ERROR> Unable to gather a download URL for the selected ruleset provider."); return 1; }
@@ -400,7 +400,7 @@ sub downloadruleset ($) { # Check if the server responds with 304 (Not Modified). } elsif ($response->code == 304) { # Log to syslog. - &_log_to_syslog("Ruleset is up-to-date, no update required."); + &_log_to_syslog("<INFO> Ruleset is up-to-date, no update required.");
# Nothing to be done, the ruleset is up-to-date. return; @@ -411,7 +411,7 @@ sub downloadruleset ($) { my $error = $response->content;
# Log error message. - &_log_to_syslog("Unable to download the ruleset. ($error)"); + &_log_to_syslog("<ERROR> Unable to download the ruleset. ($error)");
# Return "1" - false. return 1; @@ -446,8 +446,8 @@ sub downloadruleset ($) { # Check if both file sizes match. if (($remote_filesize) && ($remote_filesize ne $local_filesize)) { # Log error message. - &_log_to_syslog("Unable to completely download the ruleset. "); - &_log_to_syslog("Only got $local_filesize Bytes instead of $remote_filesize Bytes. "); + &_log_to_syslog("<ERROR> Unable to completely download the ruleset. "); + &_log_to_syslog("<ERROR> Only got $local_filesize Bytes instead of $remote_filesize Bytes. ");
# Delete temporary file. unlink("$tmpfile"); @@ -459,7 +459,7 @@ sub downloadruleset ($) { # Check if a file name could be obtained. unless ($dl_rulesfile) { # Log error message. - &_log_to_syslog("Unable to store the downloaded rules file. "); + &_log_to_syslog("<ERROR> Unable to store the downloaded rules file. ");
# Delete downloaded temporary file. unlink("$tmpfile"); @@ -518,7 +518,7 @@ sub extractruleset ($) {
# Check if the file exists. unless (-f $tarball) { - &_log_to_syslog("Could not find ruleset file: $tarball"); + &_log_to_syslog("<ERROR> Could not find ruleset file: $tarball");
# Return nothing. return; @@ -897,7 +897,7 @@ sub _log_to_syslog ($) {
# The syslog function works best with an array based input, # so generate one before passing the message details to syslog. - my @syslog = ("ERR", "<ERROR> $message"); + my @syslog = ("ERR", "$message");
# Establish the connection to the syslog service. openlog('oinkmaster', 'cons,pid', 'user');
Yes, agreed. We should not send any HEAD requests at all. GET will be able to tell us the same.
Not sure if this is a relic from before when we had HTTP/1.1. Wow.
On 23 Mar 2022, at 04:04, Stefan Schantl stefan.schantl@ipfire.org wrote:
Even if the servers do not support HEAD requests, the remote filesize (content_length) can be obtained from the connection headers.
This generic method works for all servers and therefore we do not need the code for handle sourcefire servers in a different way anymore.
Signed-off-by: Stefan Schantl stefan.schantl@ipfire.org
config/cfgroot/ids-functions.pl | 43 +++++---------------------------- 1 file changed, 6 insertions(+), 37 deletions(-)
diff --git a/config/cfgroot/ids-functions.pl b/config/cfgroot/ids-functions.pl index 94dccc8ae..eb276030b 100644 --- a/config/cfgroot/ids-functions.pl +++ b/config/cfgroot/ids-functions.pl @@ -354,43 +354,6 @@ sub downloadruleset ($) { return 1; }
# Variable to store the filesize of the remote object.
my $remote_filesize;
# The sourcfire (snort rules) does not allow to send "HEAD" requests, so skip this check
# for this webserver.
#
# Check if the ruleset source contains "snort.org".
unless ($url =~ /\.snort\.org/) {
# Pass the requrested url to the downloader.
my $request = HTTP::Request->new(HEAD => $url);
# Accept the html header.
$request->header('Accept' => 'text/html');
# Perform the request and fetch the html header.
my $response = $downloader->request($request);
# Check if there was any error.
unless ($response->is_success) {
# Obtain error.
my $error = $response->status_line();
# Log error message.
&_log_to_syslog("Unable to download the ruleset. \($error\)");
# Return "1" - false.
return 1;
}
# Assign the fetched header object.
my $header = $response->headers();
# Grab the remote file size from the object and store it in the
# variable.
$remote_filesize = $header->content_length;
}
- # Load perl module to deal with temporary files. use File::Temp;
@@ -416,6 +379,12 @@ sub downloadruleset ($) { return 1; }
# Obtain the connection headers.
my $headers = $response->headers;
# Get the remote size of the downloaded file.
my $remote_filesize = $headers->content_length;
- # Load perl stat module. use File::stat;
-- 2.30.2