| @@ -1,6 +1,8 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use lib '/opt/autodoc/lib'; | |||
| use Autodoc; | |||
| use JSON; | |||
| use DBI; | |||
| use utf8; | |||
| @@ -25,7 +27,7 @@ my %primary; | |||
| print "Loading languages\n"; | |||
| my %langid; | |||
| my $q = sqlquery($dbh, "SELECT id,short FROM lang"); | |||
| my $q = sqlquery($dbh, "SELECT id,short FROM languages"); | |||
| while(my($id,$short)=$q->fetchrow_array()) { $langid{$short}=$id; } | |||
| print "Opening queue folder $queuedir\n"; | |||
| @@ -167,7 +169,8 @@ sub ocr_file { | |||
| my($file) = @_; | |||
| my $txt = ''; | |||
| open(OCR,sprintf("tesseract -l eng+deu+fra+ita %s - |", $file)); | |||
| #open(OCR,sprintf("tesseract -l eng+deu+fra+ita %s - |", $file)); | |||
| open(OCR,sprintf("tesseract -l Latin %s - |", $file)); | |||
| while(<OCR>) { | |||
| $txt .= $_; | |||
| } | |||
| @@ -179,7 +182,7 @@ sub create_page_words { | |||
| my($pageid, $lang, $words) = @_; | |||
| foreach my $word ( @{$words} ) { | |||
| sqlquery($dbh, "CALL add_page_word(?,?,?)", | |||
| sqlquery($dbh, "CALL create_page_word(?,?,?)", | |||
| $pageid, $word, $langid{$lang}); | |||
| } | |||
| @@ -239,7 +242,7 @@ sub detect_lang { | |||
| sub update_page_status { | |||
| my($pageid, $status) = @_; | |||
| sqlquery($dbh, "CALL update_page_status(?,?)",$pageid, $status); | |||
| sqlquery($dbh, "CALL set_page_status(?,?)",$pageid, $status); | |||
| } | |||
| sub get_new_page { | |||
| @@ -259,37 +262,3 @@ sub gen_uuid { | |||
| my $ug = Data::UUID->new; | |||
| return lc($ug->create_str()); | |||
| } | |||
| sub load_conf { | |||
| my($file) = @_; | |||
| my $x=''; | |||
| open(F,"$file") || die "Failed to load configuration file"; | |||
| while(<F>) { $x.=$_; } | |||
| close(F); | |||
| return from_json($x); | |||
| } | |||
| sub sqlconnect { | |||
| my($sql) = @_; | |||
| my $dsn = "DBI:mysql:database=$sql->{base};host=$sql->{host}"; | |||
| my $dbh = DBI->connect($dsn, $sql->{user}, $sql->{pass}, { mysql_enable_utf8 => 1 }) || \ | |||
| die "Failed to connect to database"; | |||
| return $dbh; | |||
| } | |||
| sub sqlquery { | |||
| my $dbh = shift; | |||
| my $query = shift; | |||
| my @args = @_; | |||
| #print STDERR "$query\n"; | |||
| my $sth = $dbh->prepare($query) || die "Failed to execute SQL query"; | |||
| $sth->execute(@args) || die "Failed to execute SQL query"; | |||
| return $sth; | |||
| } | |||
| @@ -0,0 +1,37 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use lib '/opt/autodoc/lib'; | |||
| use Autodoc; | |||
| use JSON; | |||
| use DBI; | |||
| use warnings; | |||
| my $conf = load_conf("../etc/autodoc.json"); | |||
| print "!!! STOP !!!"x4 . "\n"; | |||
| print "!!! STOP !!!"x4 . "\n"; | |||
| print "!!! STOP !!!"x4 . "\n"; | |||
| print "!!! STOP !!!"x4 . "\n"; | |||
| print "Running this script on an existing setup WILL ERASE ALL YOUR DATA!!!\n\n"; | |||
| print "Are you sure you want to continue? [N/y] "; | |||
| my $resp = <STDIN>; | |||
| chomp $resp; | |||
| if ( $resp !~ /^[yY]$/ ) { | |||
| print "Ok, bye\n"; | |||
| exit; | |||
| } | |||
| print "Creating database schema\n"; | |||
| system(sprintf("cat %s/%s | mysql -u %s -h %s --password='%s' %s 2>/dev/null", | |||
| $conf->{path}{global}, | |||
| 'etc/schema.sql', | |||
| $conf->{sql}{user}, | |||
| $conf->{sql}{host}, | |||
| $conf->{sql}{pass}, | |||
| $conf->{sql}{base} | |||
| )); | |||
| print "All done\n"; | |||
| @@ -0,0 +1,57 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use lib '/opt/autodoc/lib'; | |||
| use Autodoc; | |||
| use utf8; | |||
| use warnings; | |||
| $|=1; | |||
| my $conf = load_conf("../etc/autodoc.json"); | |||
| my $dbh = sqlconnect($conf->{sql}); | |||
| my $chunk = 10000; | |||
| my %lang = ( | |||
| 'fr' => [ 'french' ], | |||
| 'de' => [ 'swiss', 'ngerman' ], | |||
| 'en' => [ 'british-english-large', 'american-english-large' ], | |||
| 'it' => [ 'italian' ], | |||
| ); | |||
| foreach my $lang ( sort keys %{$conf->{dict}} ) { | |||
| print "Loading language $lang ...\n"; | |||
| foreach my $dict ( @{$conf->{dict}{$lang}} ) { | |||
| my $file = '/usr/share/dict/'.$dict; | |||
| my @words; | |||
| open(DICT, $file) || die "Failed to load $file, did you install the required dictionaries?"; | |||
| while(<DICT>) { chomp; push @words, $_; } | |||
| close(DICT); | |||
| my $len = scalar(@words); | |||
| for(my $pos = 0; $pos < $len; $pos+=$chunk) { | |||
| my @query; | |||
| my @args; | |||
| for(my $i=0; $i<$chunk && defined $words[$pos+$i]; $i++) { | |||
| push @args, $words[$pos+$i]; | |||
| push @args, $lang; | |||
| push @query, "(?,?)"; | |||
| } | |||
| my $query = "INSERT IGNORE INTO dict" . | |||
| "(word, lang) VALUES " . | |||
| join(",",@query); | |||
| sqlquery($dbh, $query, @args); | |||
| my $cnt = $pos + (scalar(@args)/2); | |||
| printf("\t%s %s/%s (%i%%) \r", | |||
| $dict, $cnt , $len, int($cnt/$len*100) ); | |||
| } | |||
| printf("\n"); | |||
| } | |||
| } | |||
| @@ -1,91 +0,0 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use JSON; | |||
| use DBI; | |||
| use utf8; | |||
| use GD::Simple; | |||
| use Data::Dumper; | |||
| use Data::UUID; | |||
| use warnings; | |||
| $|=1; | |||
| $Data::Dumper::Sortkeys = 1; | |||
| my $conf = load_conf("../etc/autodoc.json"); | |||
| my $dbh = sqlconnect($conf->{sql}); | |||
| my %lang = ( | |||
| 'fr' => [ 'french' ], | |||
| 'de' => [ 'swiss', 'ngerman' ], | |||
| 'en' => [ 'british-english-large', 'american-english-large' ], | |||
| 'it' => [ 'italian' ], | |||
| ); | |||
| foreach my $lang ( sort keys %lang ) { | |||
| print "Loading language $lang ...\n"; | |||
| foreach my $dict ( @{$lang{$lang}} ) { | |||
| my $file = '/usr/share/dict/'.$dict; | |||
| my $len; | |||
| open(LEN,"wc -l $file |"); | |||
| while(<LEN>) { if ( /(\d+)/ ) { $len = $1; } } | |||
| close(LEN); | |||
| next if !defined $len; | |||
| my $cnt = 0; | |||
| my $start = time(); | |||
| open(DICT,$file); | |||
| while(<DICT>) { | |||
| chomp; | |||
| sqlquery($dbh, "INSERT IGNORE INTO dict SET word = ?, lang = ?", | |||
| $_, $lang); | |||
| $cnt++; | |||
| if ( ! ( $cnt % 777 ) || $cnt eq $len ) { | |||
| printf("\t%s %s/%s (%i%%) ETA: %ss \r", | |||
| $dict, $cnt, $len, int($cnt/$len*100), | |||
| time()-$start eq 0 ? '-' : int( ( (time()-$start)/$cnt*$len ) - ( time()-$start ) ) | |||
| ); | |||
| } | |||
| } | |||
| close(DICT); | |||
| printf("\n"); | |||
| } | |||
| } | |||
| sub load_conf { | |||
| my($file) = @_; | |||
| my $x=''; | |||
| open(F,"$file") || die "Failed to load configuration file"; | |||
| while(<F>) { $x.=$_; } | |||
| close(F); | |||
| return from_json($x); | |||
| } | |||
| sub sqlconnect { | |||
| my($sql) = @_; | |||
| my $dsn = "DBI:mysql:database=$sql->{base};host=$sql->{host}"; | |||
| my $dbh = DBI->connect($dsn, $sql->{user}, $sql->{pass}, { | |||
| mysql_enable_utf8 => 1 | |||
| }) || die "Failed to connect to database"; | |||
| return $dbh; | |||
| } | |||
| sub sqlquery { | |||
| my $dbh = shift; | |||
| my $query = shift; | |||
| my @args = @_; | |||
| #print STDERR "$query\n"; | |||
| my $sth = $dbh->prepare($query) || die "Failed to execute SQL query"; | |||
| $sth->execute(@args) || die "Failed to execute SQL query"; | |||
| return $sth; | |||
| } | |||
| @@ -0,0 +1,87 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use lib '/opt/autodoc/lib'; | |||
| use Autodoc; | |||
| use warnings; | |||
| my $conf = load_conf('/opt/autodoc/etc/autodoc.json'); | |||
| my $dbh = sqlconnect($conf->{sql}); | |||
| while(1) { | |||
| print "User management\n"; | |||
| print "(l)ist, (a)dd, (d)elete, (q)uit\n"; | |||
| print "? "; | |||
| my $m = input('^[ladq]$'); | |||
| if ( !defined $m ) { | |||
| print "ERROR: Invalid input\n"; | |||
| } | |||
| elsif ( $m eq 'l' ) { | |||
| foreach my $user ( list_users() ) { | |||
| print "User: $user\n"; | |||
| } | |||
| } | |||
| elsif ( $m eq 'a' ) { | |||
| my ($user, $pass); | |||
| while(!defined $user) { | |||
| print "Username: "; | |||
| $user = input('^[0-9a-zA-Z-]+$'); | |||
| print "Invalid username\n" if !defined $user; | |||
| } | |||
| while(!defined $pass) { | |||
| print "Password: "; | |||
| $pass = input('^[0-9a-zA-Z-]+$'); | |||
| print "Invalid password\n" if !defined $pass; | |||
| } | |||
| create_user($user,$pass); | |||
| } | |||
| elsif ( $m eq 'd' ) { | |||
| print "Username to delete: "; | |||
| my $user = input('^[0-9a-zA-Z-]+$'); | |||
| delete_user($user); | |||
| } | |||
| elsif ( $m eq 'q' ) { | |||
| print "Bye\n"; | |||
| exit; | |||
| } | |||
| } | |||
| sub create_user { | |||
| my($user,$pass) = @_; | |||
| my $crypt; | |||
| open(X,"htpasswd -nbB '$user' '$pass' |"); | |||
| while(<X>) { | |||
| chomp; | |||
| (undef, $crypt)=split(/:/) if /:/; | |||
| } | |||
| close(X); | |||
| if ( defined $crypt ) { | |||
| sqlquery($dbh, "INSERT INTO users SET username = ?, passwd = ?", $user, $crypt); | |||
| } | |||
| } | |||
| sub delete_user { | |||
| my($user) = @_; | |||
| sqlquery($dbh, "DELETE FROM users WHERE username = ?", $user); | |||
| } | |||
| sub list_users { | |||
| my @users; | |||
| my $q = sqlquery($dbh, "SELECT username FROM users ORDER BY username"); | |||
| while(my ($user) = $q->fetchrow_array()) { push @users, $user; } | |||
| return @users; | |||
| } | |||
| sub input { | |||
| my($re) = @_; | |||
| my $str = <STDIN>; chomp $str; | |||
| return $str if $str =~ /$re/; | |||
| return undef; | |||
| } | |||
| @@ -1,6 +1,8 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use lib '/opt/autodoc/lib'; | |||
| use Autodoc; | |||
| use FCGI; | |||
| use JSON; | |||
| use DBI; | |||
| @@ -104,18 +106,6 @@ sub fatal_api_error { | |||
| } | |||
| sub load_conf { | |||
| my($file) = @_; | |||
| my $x=''; | |||
| open(F,"$file") || fatal_api_error(500,"Failed to load configuration file"); | |||
| while(<F>) { $x.=$_; } | |||
| close(F); | |||
| return from_json($x); | |||
| } | |||
| sub process_query { | |||
| my($method, $path, $qs, $post, $user) = @_; | |||
| @@ -549,25 +539,3 @@ sub parse_post { | |||
| return { ctype => $ct, len => $len, data => $data}; | |||
| } | |||
| sub sqlconnect { | |||
| my($sql) = @_; | |||
| my $dsn = "DBI:mysql:database=$sql->{base};host=$sql->{host}"; | |||
| my $dbh = DBI->connect($dsn, $sql->{user}, $sql->{pass}, { mysql_enable_utf8 => 1 }) || \ | |||
| fatal_api_error(500,"Failed to connect to database"); | |||
| return $dbh; | |||
| } | |||
| sub sqlquery { | |||
| my $dbh = shift; | |||
| my $query = shift; | |||
| my @args = @_; | |||
| #print STDERR "$query\n"; | |||
| my $sth = $dbh->prepare($query) || fatal_api_error(500,"Failed to execute SQL query"); | |||
| $sth->execute(@args) || fatal_api_error(500,"Failed to execute SQL query"); | |||
| return $sth; | |||
| } | |||
| @@ -7,6 +7,13 @@ | |||
| "cache": "var/cache", | |||
| "error_img": "var/error_img.jpeg" | |||
| }, | |||
| "dict": { | |||
| "en": [ "british-english-large", "amer | |||
| ican-english-large" ], | |||
| "de": [ "swiss", "ngerman" ], | |||
| "fr": [ "french" ], | |||
| "it": [ "italian" ] | |||
| }, | |||
| "sql": { | |||
| "host": "localhost", | |||
| "base": "autodoc", | |||
| @@ -0,0 +1,307 @@ | |||
| ALTER SCHEMA `autodoc` DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_general_ci ; | |||
| DROP TABLE IF EXISTS `documents_tags`; | |||
| DROP TABLE IF EXISTS `documents_pages`; | |||
| DROP TABLE IF EXISTS `pages_words`; | |||
| DROP TABLE IF EXISTS `words`; | |||
| DROP TABLE IF EXISTS `dict`; | |||
| DROP TABLE IF EXISTS `languages`; | |||
| DROP TABLE IF EXISTS `pages`; | |||
| DROP TABLE IF EXISTS `tags`; | |||
| DROP TABLE IF EXISTS `users`; | |||
| DROP TABLE IF EXISTS `documents`; | |||
| DROP PROCEDURE IF EXISTS `create_page_word`; | |||
| DROP PROCEDURE IF EXISTS `create_tag`; | |||
| DROP PROCEDURE IF EXISTS `create_document`; | |||
| DROP PROCEDURE IF EXISTS `create_page`; | |||
| DROP PROCEDURE IF EXISTS `delete_tag`; | |||
| DROP PROCEDURE IF EXISTS `get_document_filter`; | |||
| DROP PROCEDURE IF EXISTS `get_primary_page`; | |||
| DROP PROCEDURE IF EXISTS `set_primary_page`; | |||
| DROP PROCEDURE IF EXISTS `set_page_status`; | |||
| DROP FUNCTION IF EXISTS `SPLIT_STR`; | |||
| CREATE TABLE `dict` ( | |||
| `word` varchar(255) NOT NULL, | |||
| `lang` char(2) NOT NULL, | |||
| PRIMARY KEY (`word`,`lang`) | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `users` ( | |||
| `username` varchar(255) NOT NULL, | |||
| `passwd` varchar(255) DEFAULT NULL, | |||
| `groups` varchar(255) DEFAULT NULL, | |||
| PRIMARY KEY (`username`) | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `documents` ( | |||
| `id` binary(16) NOT NULL, | |||
| `owner` varchar(45) DEFAULT NULL, | |||
| `name` varchar(128) CHARACTER SET utf8mb4 DEFAULT NULL, | |||
| `created` timestamp NULL DEFAULT CURRENT_TIMESTAMP, | |||
| `status` enum('nodata','inprogress','deleted','ok') NOT NULL DEFAULT 'nodata', | |||
| PRIMARY KEY (`id`) | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `languages` ( | |||
| `id` binary(16) NOT NULL, | |||
| `short` char(2) DEFAULT NULL, | |||
| `name` varchar(45) DEFAULT NULL, | |||
| PRIMARY KEY (`id`), | |||
| UNIQUE KEY `short_UNIQUE` (`short`) | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `pages` ( | |||
| `id` binary(16) NOT NULL, | |||
| `owner` varchar(45) DEFAULT NULL, | |||
| `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
| `status` enum('nodata','inprogress','deleted','ok') DEFAULT 'nodata', | |||
| `isprimary` tinyint(4) DEFAULT '0', | |||
| PRIMARY KEY (`id`) | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `tags` ( | |||
| `id` binary(16) NOT NULL, | |||
| `tag` varchar(45) DEFAULT NULL, | |||
| `color` enum('primary','secondary','success','danger','warning','info','light','dark') DEFAULT NULL, | |||
| PRIMARY KEY (`id`), | |||
| UNIQUE KEY `tag_UNIQUE` (`tag`,`color`) | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `words` ( | |||
| `id` binary(16) NOT NULL, | |||
| `word` varchar(255) DEFAULT NULL, | |||
| `languageId` binary(16) DEFAULT NULL, | |||
| PRIMARY KEY (`id`), | |||
| UNIQUE KEY `word_UNIQUE` (`word`,`languageId`), | |||
| KEY `fk_words_langid_idx` (`languageId`), | |||
| CONSTRAINT `fk_words_languageid` FOREIGN KEY (`languageId`) REFERENCES `languages` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `documents_tags` ( | |||
| `documentId` binary(16) NOT NULL, | |||
| `tagId` binary(16) NOT NULL, | |||
| PRIMARY KEY (`documentId`,`tagId`), | |||
| KEY `fk_tags_id_idx` (`tagId`), | |||
| CONSTRAINT `fk_doctags_docid` FOREIGN KEY (`documentId`) REFERENCES `documents` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION, | |||
| CONSTRAINT `fk_doctags_tagid` FOREIGN KEY (`tagId`) REFERENCES `tags` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `pages_words` ( | |||
| `pageId` binary(16) NOT NULL, | |||
| `wordId` binary(16) NOT NULL, | |||
| `count` int(10) unsigned DEFAULT NULL, | |||
| PRIMARY KEY (`pageId`,`wordId`), | |||
| KEY `fk_pagword_wordid_idx` (`wordId`), | |||
| CONSTRAINT `fk_pagword_pageid` FOREIGN KEY (`pageId`) REFERENCES `pages` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION, | |||
| CONSTRAINT `fk_pagword_wordid` FOREIGN KEY (`wordId`) REFERENCES `words` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| CREATE TABLE `documents_pages` ( | |||
| `documentId` binary(16) NOT NULL, | |||
| `pageId` binary(16) NOT NULL, | |||
| PRIMARY KEY (`documentId`,`pageId`), | |||
| KEY `fk_docpage_pageid_idx` (`pageId`), | |||
| CONSTRAINT `fk_docpage_docid` FOREIGN KEY (`documentId`) REFERENCES `documents` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION, | |||
| CONSTRAINT `fk_docpage_pageid` FOREIGN KEY (`pageId`) REFERENCES `pages` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION | |||
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; | |||
| DELIMITER ;; | |||
| CREATE FUNCTION `SPLIT_STR`( | |||
| x VARCHAR(255), | |||
| delim VARCHAR(12), | |||
| pos INT | |||
| ) RETURNS varchar(255) CHARSET utf8mb4 | |||
| RETURN | |||
| CASE WHEN CHAR_LENGTH(SUBSTRING_INDEX(x, delim, pos - 1)) = CHAR_LENGTH(x) | |||
| THEN x | |||
| ELSE (REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos), CHAR_LENGTH(SUBSTRING_INDEX(x, delim, pos -1)) + 1), delim, '')) | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `create_page_word`( | |||
| IN IN_pageid CHAR(36), | |||
| IN IN_word VARCHAR(255), | |||
| IN IN_langid CHAR(36)) | |||
| BEGIN | |||
| DECLARE LOC_wordid BINARY(16) DEFAULT NULL; | |||
| INSERT IGNORE INTO words SET | |||
| id = UUID_TO_BIN(UUID()), | |||
| word = IN_word, | |||
| langId = UUID_TO_BIN(IN_langid); | |||
| SELECT BIN_TO_UUID(id) INTO LOC_wordid FROM words WHERE word = IN_word AND langId = UUID_TO_BIN(IN_langid); | |||
| INSERT INTO pages_words SET | |||
| pageId = UUID_TO_BIN(IN_pageid), | |||
| wordId = UUID_TO_BIN(LOC_wordid), | |||
| count = 1 | |||
| ON DUPLICATE KEY | |||
| UPDATE count = count + 1; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `create_tag`( | |||
| IN IN_documentid BINARY(16), | |||
| IN IN_tag VARCHAR(45)) | |||
| BEGIN | |||
| DECLARE LOC_tagid BINARY(16) DEFAULT UUID(); | |||
| INSERT IGNORE INTO tags SET | |||
| id = LOC_tagid, | |||
| tag = IN_tag; | |||
| SELECT id INTO LOC_tagid FROM tags WHERE tag = IN_tag; | |||
| INSERT IGNORE INTO documents_tags SET | |||
| documentId = IN_documentid, | |||
| tagId = LOC_tagid; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `create_document`( | |||
| IN IN_owner VARCHAR(45)) | |||
| BEGIN | |||
| DECLARE LOC_documentid BINARY(16) DEFAULT UUID(); | |||
| INSERT INTO documents SET | |||
| id = LOC_documentid, | |||
| owner = IN_owner, | |||
| status = 'nodata'; | |||
| SELECT LOC_documentid AS documentId; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `create_page`( | |||
| IN IN_documentId BINARY(16)) | |||
| BEGIN | |||
| DECLARE LOC_pageid BINARY(16) DEFAULT UUID(); | |||
| INSERT INTO pages SET | |||
| id = LOC_pageid, | |||
| status = 'nodata'; | |||
| INSERT INTO documents_pages SET | |||
| documentId = UUID_TO_BIN(IN_documentid), | |||
| pageId = LOC_pageid; | |||
| SELECT BIN_TO_UUID(LOC_pageid) AS pageId; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `delete_tag`( | |||
| IN IN_documentid BINARY(16), | |||
| IN IN_tag VARCHAR(45)) | |||
| BEGIN | |||
| DECLARE LOC_tagid BINARY(16) DEFAULT UUID(); | |||
| SELECT id INTO LOC_tagid FROM tags WHERE tag = IN_tag; | |||
| DELETE FROM documents_tags | |||
| WHERE documentId = IN_documentid | |||
| AND tagId = LOC_tagid; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `get_document_filter`( | |||
| IN IN_words VARCHAR(255), | |||
| IN IN_tags VARCHAR(255), | |||
| IN IN_lim1 INT, | |||
| IN IN_lim2 INT) | |||
| BEGIN | |||
| DECLARE L_query TEXT default ""; | |||
| DECLARE L_pos INT UNSIGNED DEFAULT 0; | |||
| SELECT BIN_TO_UUID(dp.documentId) AS id, SUM(pw.count) AS wordcount | |||
| FROM documents_pages dp | |||
| JOIN pages_words pw ON dp.pageId = pw.pageId | |||
| JOIN words w ON pw.wordId = w.id | |||
| WHERE w.word REGEXP CONCAT("(", IN_words, ")") | |||
| GROUP BY dp.documentId | |||
| ORDER BY wordcount DESC | |||
| LIMIT IN_lim1, IN_lim2; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `get_primary_page`( | |||
| IN IN_documentid BINARY(16)) | |||
| BEGIN | |||
| DECLARE LOC_pageid BINARY(16) DEFAULT NULL; | |||
| SELECT dp.pageId INTO LOC_pageid FROM | |||
| documents_pages dp | |||
| JOIN pages p ON dp.pageId = p.id | |||
| WHERE dp.documentId = IN_documentid | |||
| AND p.isprimary = 1; | |||
| IF LOC_pageid IS NULL | |||
| THEN | |||
| SELECT dp.pageId INTO LOC_pageid FROM | |||
| documents_pages dp | |||
| JOIN pages p ON dp.pageId = p.id | |||
| WHERE dp.documentId = IN_documentid | |||
| ORDER BY RAND() LIMIT 1; | |||
| END IF; | |||
| SELECT BIN_TO_UUID(LOC_pageid) AS pageId; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `set_primary_page`( | |||
| IN IN_pageid BINARY(16)) | |||
| BEGIN | |||
| DECLARE LOC_documentid BINARY(16) DEFAULT NULL; | |||
| SELECT documentId INTO LOC_documentid FROM documents_pages WHERE pageId = IN_pageid; | |||
| UPDATE | |||
| documents_pages dp | |||
| JOIN pages p ON dp.pageId = p.id | |||
| SET p.isprimary = 0 | |||
| WHERE dp.documentId = LOC_documentid; | |||
| UPDATE | |||
| pages | |||
| SET isprimary = 1 | |||
| WHERE id = IN_pageid; | |||
| END ;; | |||
| DELIMITER ; | |||
| DELIMITER ;; | |||
| CREATE PROCEDURE `set_page_status`( | |||
| IN IN_pageid BINARY(16), | |||
| IN IN_status ENUM('nodata', 'inprogress', 'deleted', 'ok') | |||
| ) | |||
| BEGIN | |||
| UPDATE pages SET status = IN_status WHERE id = UUID_TO_BIN(IN_pageid); | |||
| END ;; | |||
| DELIMITER ; | |||
| @@ -0,0 +1,48 @@ | |||
| package Autodoc; | |||
| use strict; | |||
| use JSON; | |||
| use DBI; | |||
| use warnings 'all'; | |||
| use Exporter 'import'; | |||
| our @EXPORT = qw(sqlconnect sqlquery load_conf); | |||
| sub load_conf { | |||
| my($file) = @_; | |||
| my $x=''; | |||
| die "No configuration file given" if !defined $file; | |||
| open(F,"$file") || die "Failed to load configuration file"; | |||
| while(<F>) { $x.=$_; } | |||
| close(F); | |||
| return from_json($x); | |||
| } | |||
| sub sqlconnect { | |||
| my($sql) = @_; | |||
| my $dsn = "DBI:mysql:database=$sql->{base};host=$sql->{host}"; | |||
| my $dbh = DBI->connect($dsn, $sql->{user}, $sql->{pass}, { | |||
| mysql_enable_utf8 => 1 | |||
| }) || die "Failed to connect to database"; | |||
| return $dbh; | |||
| } | |||
| sub sqlquery { | |||
| my $dbh = shift; | |||
| my $query = shift; | |||
| my @args = @_; | |||
| #print STDERR "$query\n"; | |||
| my $sth = $dbh->prepare($query) || die "Failed to execute SQL query"; | |||
| $sth->execute(@args) || die "Failed to execute SQL query"; | |||
| return $sth; | |||
| } | |||
| return 1; | |||
| @@ -1,6 +1,7 @@ | |||
| apt-get install mysql-client | |||
| apt-get install aspell-fr aspell-it aspell-de aspell-en | |||
| apt-get install wfrench wbritish-large witalian wswiss wngerman wamerican-large | |||
| apt-get install tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-ita tesseract-ocr-eng | |||
| apt-get install tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-ita tesseract-ocr-eng tesseract-ocr-script-latn | |||
| apt-get install poppler-utils | |||
| cd /opt/autodoc/www/js/ | |||
| @@ -1,3 +0,0 @@ | |||
| <html> | |||
| <body>Nothing here</body> | |||
| </html> | |||