Index: E:/workspaces/clear/keywordscan/keywdscan.pl =================================================================== --- E:/workspaces/clear/keywordscan/keywdscan.pl (revision 348960) +++ E:/workspaces/clear/keywordscan/keywdscan.pl (working copy) @@ -1,9 +1,8 @@ #!/usr/bin/perl use strict; -#use POSIX; +use Getopt::Std; use POSIX qw/strftime/; use File::Find; -#use Time::Local; use Cwd; use File::Spec::Functions; @@ -13,18 +12,27 @@ =head1 SYNOPSIS - #Scan using the file "config_file" + # Scan using the file "config_file" keywdscan.pl config_file - + + # Scan ignoring case using the root "/harmony", keywords in + # "keywords.txt", with output to "hykeywords.csv" and warn about the + # types "zip,jar" + keywdscan.pl -i -r /harmony -k keywords.txt -o hykeywords.csv -t zip,jar + =head1 DESCRIPTION -Takes one command line parameter - the name of the configuration file. See C for a sample. If no -file name is supplied it will look for a file called C in the same directory as it's being run from. +Takes one command line parameter - the name of the configuration +file. See C for a sample. If no file name is supplied it +will look for a file called C in the same directory as +it's being run from. -The list of words to be scanned for are read from C< KeywordInputFileName>. +The list of words to be scanned for are read from C. -Scans source code files in directories under C looking for occurrences of keywords. -Examined file types (C) are specified by suffix - eg java, c, h, cpp.... +Scans source code files in directories under C +looking for occurrences of keywords. Will scan all text files and +(optionally) warn about the existence of files which may contain +source (eg gzip, tar) Output is written to C as a CSV file in the form: @@ -33,16 +41,26 @@ Line number, line, matched word Line number, line, matched word .... - -Note that in "line" any commas are changed to : so as to preserve the format of the CSV file. +Note that in "line" any commas are changed to : so as to preserve the +format of the CSV file. + Case sensitive searching is optional. =head1 FUNCTIONS =cut -# +my %opt; +getopts('ir:o:k:t:', \%opt); + +my $keywd_infile; +my $keywd_outfile; +my $scan_root; +my $case_ins = 0; +my @filetypes; +my %warning; + my $config_file = shift; if ( !defined $config_file ) { @@ -51,42 +69,36 @@ $config_file = catfile( $config_path, "KEYWD_CONFIG" ); } -if ( !-e "$config_file" ) { - die "Config file ($config_file)does not exist.\n"; -} - -# -# -# Read the config file -# -# -print "Using Config file $config_file \n"; -open( CONF, "$config_file" ) || die "Can't open KEYWD_CONFIG file $!\n"; - -my $keywd_infile; -my $keywd_outfile; -my $scan_root; -my $case_ins = 0; -my @filetypes; - -while () { +if ( -e "$config_file" ) { + # die "Config file ($config_file)does not exist.\n"; + + # Read the config file + # + print "Using Config file $config_file \n"; + open( CONF, "$config_file" ) || die "Can't open KEYWD_CONFIG file $!\n"; + while () { if (/^KeywordInputFileName:(.*)/) { $keywd_infile = $1; } if (/^ReportFileName:(.*)/) { $keywd_outfile = $1; } if (/^ScanRootDirectory:(.*)/) { $scan_root = $1; } if (/^CaseInsensitive/) { $case_ins = 1; } if (/^FileTypes:(.*)/) { @filetypes = split /,/, $1; } + } +} else { + print "No Config file found \n"; } +$case_ins = 1 if (exists $opt{'i'}); +$scan_root = $opt{'r'} if (exists $opt{'r'}); +$keywd_outfile = $opt{'o'} if (exists $opt{'o'}); +$keywd_infile = $opt{'k'} if (exists $opt{'k'}); +@filetypes = split /,/, $opt{'t'} if (exists $opt{'t'}); + # # Get date # -#my ( $se, $mi, $ho, $md, $mo, $ye, $wd, $yd, $is ) = localtime; -my $date_str = strftime("%Y-%m-%d at %H:%M", localtime); +my $date_str = strftime( "%Y-%m-%d at %H:%M", localtime ); # -#my $month = $mo + 1; -#my $year = $ye + 1900; -#my $date_str = $year . "-" . $month . "-" . $md . " at " . $ho . ":" . $mi; # print "Starting scan at root directory: $scan_root\n"; @@ -96,10 +108,9 @@ # open( OUT, "> $keywd_outfile" ) || die "Can't open ReportFile $!\n"; print OUT "Scan date $date_str\n"; -if ($case_ins) { print OUT"Case insensitive search\n"; } +if ($case_ins) { print OUT "Case insensitive search\n"; } else { print OUT "Case sensitive search\n"; } -my $ft = join ";", @filetypes; -print OUT "Looking for file types $ft \n\n"; +print OUT "Flag existence of files of type: @filetypes\n"; # # Create lookup table of filetypes @@ -120,7 +131,7 @@ # push( @words, $word ); -# Deal with ( . in the keyword file by back-slashing. Could deal with * here too? + # Deal with ( . and other metacharacters in the keyword file $word = quotemeta($word); push( @matchwords, $word ); } @@ -133,6 +144,15 @@ find( \&source, $scan_root ); # +# Print warnings first +# +print OUT "********************WARNINGS**********************\n"; +while ( my ( $file, $ext ) = each %warning ) { + print OUT "***** Found $file - MAY CONTAIN SOURCE\n"; +} +print OUT "******************END WARNINGS********************\n"; + +# # Check each file for keyword matches # my $totalmatch = 0; @@ -188,17 +208,21 @@ =head2 C -Required by File::Find - looks for files ending in .xxx +Required by File::Find - looks for text files and files ending in .xxx (specified in C) =cut sub source { - return unless ( $_ =~ /\.([^.]+)$/ ); - my $ext = $1; - if ( exists $sourcetypes{$ext} ) { - my $name = catfile( $File::Find::dir, $_ ); - push @sourcefiles, $name; + if ( $_ =~ /\.([^.]+)$/ ) { + my $ext = $1; + if ( exists $sourcetypes{$ext} ) { + $warning{$_} = $ext; + } } + return unless -f $_ && -T $_; + my $name = catfile( $File::Find::dir, $_ ); + push @sourcefiles, $name; + return; } @@ -236,3 +260,4 @@ */ =cut +