Browse Source

finished image ocr

master
Pascal Gloor 5 years ago
parent
commit
e16f557794
1 changed files with 35 additions and 4 deletions
  1. 35
    4
      bin/autodoc_process.pl

+ 35
- 4
bin/autodoc_process.pl View File



print "\t\tupdating page status to 'ok'\n"; print "\t\tupdating page status to 'ok'\n";
update_page_status($pageid, 'ok'); update_page_status($pageid, 'ok');
print "\tdone\n";
} }
} }
elsif ( $ext =~ /^(jpeg|png)$/ ) { elsif ( $ext =~ /^(jpeg|png)$/ ) {
print "\tdetecting image rotation\n"; print "\tdetecting image rotation\n";
my @res;
my %res;
for(my $rot=0; $rot<360; $rot+=90) { for(my $rot=0; $rot<360; $rot+=90) {
print "\ttrying $rot degrees rotation\n"; print "\ttrying $rot degrees rotation\n";
my $tempfile = "/tmp/autodoc.$$.jpeg"; my $tempfile = "/tmp/autodoc.$$.jpeg";


print "\t\tfound $dictmatches words in dictionary\n"; print "\t\tfound $dictmatches words in dictionary\n";


push @res, {
$res{$rot} = {
lang => $lang, lang => $lang,
words => $words, words => $words,
dictmatches => $dictmatches dictmatches => $dictmatches
unlink($tempfile); unlink($tempfile);
} }


my $maxwords = 0;
my $bestrot;
foreach my $rot ( keys %res ) {
$bestrot=$rot if !defined $bestrot;
if ( $maxwords < $res{$rot}{dictmatches} ) {
$maxwords = $res{$rot}{dictmatches};
$bestrot = $rot;
}
}

print "\tbest OCR results with $bestrot rotation\n";

my $pageid = get_new_page($docid);
print "\t\tcreated page id $pageid\n";

print "\t\tupdating page status to 'inprogress'\n";
update_page_status($pageid, 'inprogress');

if ( !exists $primary{$docid} ) {
print "\t\tsetting document for default primary thumbnail\n";
$primary{$docid}=undef;
sqlquery($dbh, "CALL set_primary_page(?)",$pageid);
}
print "\t\tcreating original page jpeg $pageid.jpeg";
system(sprintf("convert %s/%s %s/%s.jpeg", $queuedir, $file, $originaldir, $pageid));

print "\t\tloading extracted words into database\n";
create_page_words($pageid, $res{$bestrot}{lang}, $res{$bestrot}{words});
print "\t\tupdating page status to 'ok'\n";
update_page_status($pageid, 'ok');
print "\tdone\n";
} }
else { else {
print "\terror: don't know how to process files of $ext type"; print "\terror: don't know how to process files of $ext type";
$dictwords++ if $found; $dictwords++ if $found;
} }


print Dumper(\%lcnt);
#print Dumper(\%lcnt);


my $max = 0; my $max = 0;
my $lmax; my $lmax;

Loading…
Cancel
Save