#!C:\perl\perl.exe # geturls.pl rifles through a directory, and # extracts all the URL's and email addresses it can, and # appends them to the appropriate files. # the name of the directory containing files to be parsed $directory = "html"; # the name of the file to append URLs to $u = "newUrls.txt"; # the name of the file to append email addresses to $e = 'webmail.txt'; $mailSearch = '([^":\s<>()/;]*@[^":\s<>()/;\.]*)'; $URLsearch = '(http://[\/\.\w\-\_\=\?\%\d\&]*)'; $URLsearch2 = '(www[\/\.\w\-\_\=\?\%\d\&]*)'; # @email = (); # (re)initialize array chdir($directory) or die $!; open(EMAIL, ">>../$e") or die $!; open(NEW_URLs, ">>../$u") or die $!; foreach (<*>) { open (INFILE, $_) or die $!; while() { if (m,$mailSearch,ig) { print EMAIL "$&\n"; print STDOUT "Appending $& to file"; } elsif ((m,$URLsearch,ig) or (m,$URLsearch2,ig)) { print NEW_URLs "$&\n"; print STDOUT "Appending $& to file"; } } close INFILE; }