#!/usr/bin/perl # Filters an RSS feed for interesting content: # search words are read from a file and in addition, # all names of applications stored in the /Applications # directory (on Mac OS X) can be used as keywords. # # This allows you to filter the RSS feeds of software version # update sites that would not otherwise allow you to filter # the output. # # Run this on your local web server or as cron job # Use on public web servers is discouraged. Security risks! # How to install this? Instructions here: # http://www.davids-world.com/archives/2005/05/filtering_macup.html # (C) 2005 David Reitter, http://www.reitter-it-media.de/ # Released under the GNU General Public License. # set to 1 if you'd like to run this as CGI, # 0 otherwise. my $runascgi = 1; # this is the path to store the temporary file in # needs to be writeable by this script (i.e. by web server if CGI is used!) my $rsstempfilepath = "/home/".$ENV{'USER'}.""; # for general Unix systems # my $rsstempfilepath = "/Users/".$ENV{'USER'}."/Temp"; # for the author's system # my $rsstempfilepath = "/Users/".$ENV{'USER'}."/Library/Application Support"; # for OS X systems # set this to 1 if you'd like to include the contents of the /Applications directory (Mac OS X) # as keywords my $appsearch = 1; # the file containing the keywords to look for (line by line) my $keywordsfile = ""; # NO CHANGES BEYOND THIS LINE NECESSARY # --------------------------------------------------------------------------------------------------- if ($ARGV[0]) { $runascgi = 0; } # this little CGI browses use strict; my $sourcerss = ""; my $rssfile = ""; if ($runascgi) { use CGI qw/:standard/; # if running as CGI, set this manually in the script if (param()) { $sourcerss = param('source'); } unless ($sourcerss =~ /^http:\/\//) { print "Content-Type: text/plain\n\n"; print "You must give this script an URL as argument 'source' that begins with http://. Usage example: /cgi-bin/filter-rss.cgi?source=http://mysite.com/index.rss\n\n"; exit; } $rssfile = "filter-rss-".&hashCode($sourcerss).".temp.rss"; } else { $sourcerss = $ARGV[0]; # give source rss $keywordsfile = $ARGV[1]; # file with keywords (line-by-line), optional if ($sourcerss eq '') { print "Filter-RSS usage:\nfilter-rss sourceURL [keywordsfile]\n"; print "Ore use as CGI with argument source=sourceURL"; exit; } $rssfile = $rsstempfilepath."/filter-rss-".&hashCode($sourcerss).".temp.rss"; } # this is used for temporary storage # no changes beyond this line my @keywords = (); if (open K, "<$keywordsfile") { while () { chop $_; push @keywords, $_; } close K; } if ($appsearch) { open K, "ls /Applications |" || die "couldnt open keyword file"; while () { chop $_; s/\.app//; s/\s-\s.*$//; # chop off version number s/([0-9]+\.)*[0-9]+//g; # chop off version number s/^\s//g; s/\s$//g; push @keywords, $_; } close K; } my $prevrss = ''; if ( open P, $rssfile ) { warn "reading old file $rssfile"; while (

) { $prevrss .= $_; } close P; } open C, "curl $sourcerss 2>/dev/null |"; my $file = join('', ); close C; unless ($prevrss) { my $header; my $footer; if ($file =~ /^(.*?)/is) { $header = $1; } if ($file =~ /(<\/channel>.*?)$/is) { $footer = $1; } $prevrss = $header.$footer; unless ($prevrss) { $prevrss = ' '; } } while ($file =~ /(.*?<\/item>)/isg) { my $item = $1; if ($item =~ /(.*?)<\/title>/is) { my $title = $1; my $desc = ''; if ($item =~ /<description>(.*?)<\/description>/is) { $desc = $1; } #print $title; foreach my $k (@keywords) { my $kq = quotemeta($k); if ($title =~ /\b$kq\b/s || $desc =~ /\b$kq\b/s ) { warn "found: $title $k"; my $name = $title; $name =~ s/\s-\s.*$//; # chop off version number $name =~ s/([0-9]+\.)*[0-9]+//g; # chop off version number $name =~ s/^\s//g; s/\s$//g; my $nameq = quotemeta($name); $prevrss =~ s/<item>\s*<title>$nameq\b.*?<\/item>//is; if ($prevrss =~ /<item>/) { $prevrss =~ s/<item>/$item\n<item>/is; } else { $prevrss =~ s/<channel>/<channel>$item\n/is; } last; } } } } # store result if (open RSS, ">".$rssfile) # || die("cannot write to $rssfile"); { print RSS $prevrss; close RSS; warn "written to $rssfile"; } if ($runascgi) { print "Content-Type: text/xml\n\n"; } print $prevrss; sub hashCode { my ($text) = @_; my $length = length($text); my @characters = unpack("C*", $text); my $char; my $code = 0; my $i = 1; foreach $char (@characters) { $code += $char * 31^($length - $i); $i++; } return $code; }