| @@ -81,12 +81,12 @@ foreach my $file ( readdir(Q) ) { | |||
| print "\t\tupdating page status to 'ok'\n"; | |||
| update_page_status($pageid, 'ok'); | |||
| print "\tdone\n"; | |||
| } | |||
| } | |||
| elsif ( $ext =~ /^(jpeg|png)$/ ) { | |||
| print "\tdetecting image rotation\n"; | |||
| my @res; | |||
| my %res; | |||
| for(my $rot=0; $rot<360; $rot+=90) { | |||
| print "\ttrying $rot degrees rotation\n"; | |||
| my $tempfile = "/tmp/autodoc.$$.jpeg"; | |||
| @@ -100,7 +100,7 @@ foreach my $file ( readdir(Q) ) { | |||
| print "\t\tfound $dictmatches words in dictionary\n"; | |||
| push @res, { | |||
| $res{$rot} = { | |||
| lang => $lang, | |||
| words => $words, | |||
| dictmatches => $dictmatches | |||
| @@ -109,6 +109,37 @@ foreach my $file ( readdir(Q) ) { | |||
| unlink($tempfile); | |||
| } | |||
| my $maxwords = 0; | |||
| my $bestrot; | |||
| foreach my $rot ( keys %res ) { | |||
| $bestrot=$rot if !defined $bestrot; | |||
| if ( $maxwords < $res{$rot}{dictmatches} ) { | |||
| $maxwords = $res{$rot}{dictmatches}; | |||
| $bestrot = $rot; | |||
| } | |||
| } | |||
| print "\tbest OCR results with $bestrot rotation\n"; | |||
| my $pageid = get_new_page($docid); | |||
| print "\t\tcreated page id $pageid\n"; | |||
| print "\t\tupdating page status to 'inprogress'\n"; | |||
| update_page_status($pageid, 'inprogress'); | |||
| if ( !exists $primary{$docid} ) { | |||
| print "\t\tsetting document for default primary thumbnail\n"; | |||
| $primary{$docid}=undef; | |||
| sqlquery($dbh, "CALL set_primary_page(?)",$pageid); | |||
| } | |||
| print "\t\tcreating original page jpeg $pageid.jpeg"; | |||
| system(sprintf("convert %s/%s %s/%s.jpeg", $queuedir, $file, $originaldir, $pageid)); | |||
| print "\t\tloading extracted words into database\n"; | |||
| create_page_words($pageid, $res{$bestrot}{lang}, $res{$bestrot}{words}); | |||
| print "\t\tupdating page status to 'ok'\n"; | |||
| update_page_status($pageid, 'ok'); | |||
| print "\tdone\n"; | |||
| } | |||
| else { | |||
| print "\terror: don't know how to process files of $ext type"; | |||
| @@ -187,7 +218,7 @@ sub detect_lang { | |||
| $dictwords++ if $found; | |||
| } | |||
| print Dumper(\%lcnt); | |||
| #print Dumper(\%lcnt); | |||
| my $max = 0; | |||
| my $lmax; | |||