use 5.12.0; use File::Slurp; use File::Find ; use Cwd ; my %location; open LOG, ">log/log.log" or die; open(IN, "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt") or die "$!"; say "Reading ULB"; while () { #print LOG "$_"; chomp; while (s/^([^\n\t]*)\t([^\n]*?)([A-Z][a-z]+(-[A-Z][a-z]+)?)/$1\t$2/) { # say LOG $3; unless (exists $location{$3}) {$location{$3} = $1} } } close IN; say "Outputting hash"; open(OUT, ">out/results.txt") or die "$!"; foreach my $word (sort keys %location) { say OUT "$word, $location{$word}"; } close OUT; say "Deleting common words"; my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8'); foreach my $word (sort keys %location) { my $temp = lc $word; #say LOG $word . "\t" . $temp; if ($fileText =~ /\b$temp\b/) { delete $location{$word} } } say "Outputting final product"; open(OUT, ">out/results.txt") or die "$!"; foreach my $word (sort keys %location) { say OUT "$word, $location{$word}"; } close OUT; close LOG;