|
|
@@ -81,12 +81,12 @@ foreach my $file ( readdir(Q) ) { |
|
|
|
|
|
|
|
print "\t\tupdating page status to 'ok'\n"; |
|
|
|
update_page_status($pageid, 'ok'); |
|
|
|
|
|
|
|
print "\tdone\n"; |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $ext =~ /^(jpeg|png)$/ ) { |
|
|
|
print "\tdetecting image rotation\n"; |
|
|
|
my @res; |
|
|
|
my %res; |
|
|
|
for(my $rot=0; $rot<360; $rot+=90) { |
|
|
|
print "\ttrying $rot degrees rotation\n"; |
|
|
|
my $tempfile = "/tmp/autodoc.$$.jpeg"; |
|
|
@@ -100,7 +100,7 @@ foreach my $file ( readdir(Q) ) { |
|
|
|
|
|
|
|
print "\t\tfound $dictmatches words in dictionary\n"; |
|
|
|
|
|
|
|
push @res, { |
|
|
|
$res{$rot} = { |
|
|
|
lang => $lang, |
|
|
|
words => $words, |
|
|
|
dictmatches => $dictmatches |
|
|
@@ -109,6 +109,37 @@ foreach my $file ( readdir(Q) ) { |
|
|
|
unlink($tempfile); |
|
|
|
} |
|
|
|
|
|
|
|
my $maxwords = 0; |
|
|
|
my $bestrot; |
|
|
|
foreach my $rot ( keys %res ) { |
|
|
|
$bestrot=$rot if !defined $bestrot; |
|
|
|
if ( $maxwords < $res{$rot}{dictmatches} ) { |
|
|
|
$maxwords = $res{$rot}{dictmatches}; |
|
|
|
$bestrot = $rot; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
print "\tbest OCR results with $bestrot rotation\n"; |
|
|
|
|
|
|
|
my $pageid = get_new_page($docid); |
|
|
|
print "\t\tcreated page id $pageid\n"; |
|
|
|
|
|
|
|
print "\t\tupdating page status to 'inprogress'\n"; |
|
|
|
update_page_status($pageid, 'inprogress'); |
|
|
|
|
|
|
|
if ( !exists $primary{$docid} ) { |
|
|
|
print "\t\tsetting document for default primary thumbnail\n"; |
|
|
|
$primary{$docid}=undef; |
|
|
|
sqlquery($dbh, "CALL set_primary_page(?)",$pageid); |
|
|
|
} |
|
|
|
print "\t\tcreating original page jpeg $pageid.jpeg"; |
|
|
|
system(sprintf("convert %s/%s %s/%s.jpeg", $queuedir, $file, $originaldir, $pageid)); |
|
|
|
|
|
|
|
print "\t\tloading extracted words into database\n"; |
|
|
|
create_page_words($pageid, $res{$bestrot}{lang}, $res{$bestrot}{words}); |
|
|
|
print "\t\tupdating page status to 'ok'\n"; |
|
|
|
update_page_status($pageid, 'ok'); |
|
|
|
print "\tdone\n"; |
|
|
|
} |
|
|
|
else { |
|
|
|
print "\terror: don't know how to process files of $ext type"; |
|
|
@@ -187,7 +218,7 @@ sub detect_lang { |
|
|
|
$dictwords++ if $found; |
|
|
|
} |
|
|
|
|
|
|
|
print Dumper(\%lcnt); |
|
|
|
#print Dumper(\%lcnt); |
|
|
|
|
|
|
|
my $max = 0; |
|
|
|
my $lmax; |