| print "\t\tupdating page status to 'ok'\n"; | print "\t\tupdating page status to 'ok'\n"; | ||||
| update_page_status($pageid, 'ok'); | update_page_status($pageid, 'ok'); | ||||
| print "\tdone\n"; | |||||
| } | } | ||||
| } | } | ||||
| elsif ( $ext =~ /^(jpeg|png)$/ ) { | elsif ( $ext =~ /^(jpeg|png)$/ ) { | ||||
| print "\tdetecting image rotation\n"; | print "\tdetecting image rotation\n"; | ||||
| my @res; | |||||
| my %res; | |||||
| for(my $rot=0; $rot<360; $rot+=90) { | for(my $rot=0; $rot<360; $rot+=90) { | ||||
| print "\ttrying $rot degrees rotation\n"; | print "\ttrying $rot degrees rotation\n"; | ||||
| my $tempfile = "/tmp/autodoc.$$.jpeg"; | my $tempfile = "/tmp/autodoc.$$.jpeg"; | ||||
| print "\t\tfound $dictmatches words in dictionary\n"; | print "\t\tfound $dictmatches words in dictionary\n"; | ||||
| push @res, { | |||||
| $res{$rot} = { | |||||
| lang => $lang, | lang => $lang, | ||||
| words => $words, | words => $words, | ||||
| dictmatches => $dictmatches | dictmatches => $dictmatches | ||||
| unlink($tempfile); | unlink($tempfile); | ||||
| } | } | ||||
| my $maxwords = 0; | |||||
| my $bestrot; | |||||
| foreach my $rot ( keys %res ) { | |||||
| $bestrot=$rot if !defined $bestrot; | |||||
| if ( $maxwords < $res{$rot}{dictmatches} ) { | |||||
| $maxwords = $res{$rot}{dictmatches}; | |||||
| $bestrot = $rot; | |||||
| } | |||||
| } | |||||
| print "\tbest OCR results with $bestrot rotation\n"; | |||||
| my $pageid = get_new_page($docid); | |||||
| print "\t\tcreated page id $pageid\n"; | |||||
| print "\t\tupdating page status to 'inprogress'\n"; | |||||
| update_page_status($pageid, 'inprogress'); | |||||
| if ( !exists $primary{$docid} ) { | |||||
| print "\t\tsetting document for default primary thumbnail\n"; | |||||
| $primary{$docid}=undef; | |||||
| sqlquery($dbh, "CALL set_primary_page(?)",$pageid); | |||||
| } | |||||
| print "\t\tcreating original page jpeg $pageid.jpeg"; | |||||
| system(sprintf("convert %s/%s %s/%s.jpeg", $queuedir, $file, $originaldir, $pageid)); | |||||
| print "\t\tloading extracted words into database\n"; | |||||
| create_page_words($pageid, $res{$bestrot}{lang}, $res{$bestrot}{words}); | |||||
| print "\t\tupdating page status to 'ok'\n"; | |||||
| update_page_status($pageid, 'ok'); | |||||
| print "\tdone\n"; | |||||
| } | } | ||||
| else { | else { | ||||
| print "\terror: don't know how to process files of $ext type"; | print "\terror: don't know how to process files of $ext type"; | ||||
| $dictwords++ if $found; | $dictwords++ if $found; | ||||
| } | } | ||||
| print Dumper(\%lcnt); | |||||
| #print Dumper(\%lcnt); | |||||
| my $max = 0; | my $max = 0; | ||||
| my $lmax; | my $lmax; |