60 lines
1.1 KiB
Perl
60 lines
1.1 KiB
Perl
|
use 5.12.0;
|
||
|
use File::Slurp;
|
||
|
use File::Find ;
|
||
|
use Cwd ;
|
||
|
|
||
|
my %location;
|
||
|
|
||
|
open LOG, ">log/log.log" or die;
|
||
|
|
||
|
open(IN, "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt") or die "$!";
|
||
|
|
||
|
say "Reading ULB";
|
||
|
|
||
|
while (<IN>) {
|
||
|
#print LOG "$_";
|
||
|
chomp;
|
||
|
while (s/^([^\n\t]*)\t([^\n]*?)([A-Z][a-z]+(-[A-Z][a-z]+)?)/$1\t$2/) {
|
||
|
# say LOG $3;
|
||
|
unless (exists $location{$3}) {$location{$3} = $1}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
close IN;
|
||
|
|
||
|
say "Outputting hash";
|
||
|
|
||
|
open(OUT, ">out/results.txt") or die "$!";
|
||
|
|
||
|
foreach my $word (sort keys %location) {
|
||
|
say OUT "$word, $location{$word}";
|
||
|
}
|
||
|
|
||
|
close OUT;
|
||
|
|
||
|
say "Deleting common words";
|
||
|
|
||
|
my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
|
||
|
|
||
|
foreach my $word (sort keys %location) {
|
||
|
my $temp = lc $word;
|
||
|
#say LOG $word . "\t" . $temp;
|
||
|
if ($fileText =~ /\b$temp\b/) {
|
||
|
delete $location{$word}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
say "Outputting final product";
|
||
|
|
||
|
open(OUT, ">out/results.txt") or die "$!";
|
||
|
|
||
|
foreach my $word (sort keys %location) {
|
||
|
say OUT "$word, $location{$word}";
|
||
|
}
|
||
|
|
||
|
close OUT;
|
||
|
|
||
|
|
||
|
close LOG;
|