Server IP : 85.214.239.14 / Your IP : 18.221.217.100 Web Server : Apache/2.4.62 (Debian) System : Linux h2886529.stratoserver.net 4.9.0 #1 SMP Tue Jan 9 19:45:01 MSK 2024 x86_64 User : www-data ( 33) PHP Version : 7.4.18 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare, MySQL : OFF | cURL : OFF | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : OFF Directory : /proc/3/task/3/root/lib/x86_64-linux-gnu/perl5/5.36/DBD/SQLite/VirtualTable/ |
Upload File : |
#====================================================================== package DBD::SQLite::VirtualTable::FileContent; #====================================================================== use strict; use warnings; use base 'DBD::SQLite::VirtualTable'; my %option_ok = map {($_ => 1)} qw/source content_col path_col expose root get_content/; my %defaults = ( content_col => "content", path_col => "path", expose => "*", get_content => "DBD::SQLite::VirtualTable::FileContent::get_content", ); #---------------------------------------------------------------------- # object instanciation #---------------------------------------------------------------------- sub NEW { my $class = shift; my $self = $class->_PREPARE_SELF(@_); local $" = ", "; # for array interpolation in strings # initial parameter check !@{$self->{columns}} or die "${class}->NEW(): illegal options: @{$self->{columns}}"; $self->{options}{source} or die "${class}->NEW(): missing (source=...)"; my @bad_options = grep {!$option_ok{$_}} keys %{$self->{options}}; !@bad_options or die "${class}->NEW(): bad options: @bad_options"; # defaults ... tempted to use //= but we still want to support perl 5.8 :-( foreach my $k (keys %defaults) { defined $self->{options}{$k} or $self->{options}{$k} = $defaults{$k}; } # get list of columns from the source table my $src_table = $self->{options}{source}; my $sql = "PRAGMA table_info($src_table)"; my $dbh = ${$self->{dbh_ref}}; # can't use method ->dbh, not blessed yet my $src_info = $dbh->selectall_arrayref($sql, {Slice => [1, 2]}); @$src_info or die "${class}->NEW(source=$src_table): no such table in database"; # associate each source colname with its type info or " " (should eval true) my %src_col = map { ($_->[0] => $_->[1] || " ") } @$src_info; # check / complete the exposed columns my @exposed_cols; if ($self->{options}{expose} eq '*') { @exposed_cols = map {$_->[0]} @$src_info; } else { @exposed_cols = split /\s*,\s*/, $self->{options}{expose}; my @bad_cols = grep { !$src_col{$_} } @exposed_cols; die "table $src_table has no column named @bad_cols" if @bad_cols; } for (@exposed_cols) { die "$class: $self->{options}{content_col} cannot be both the " . "content_col and an exposed col" if $_ eq $self->{options}{content_col}; } # build the list of columns for this table $self->{columns} = [ "$self->{options}{content_col} TEXT", map {"$_ $src_col{$_}"} @exposed_cols ]; # acquire a coderef to the get_content() implementation, which # was given as a symbolic reference in %options no strict 'refs'; $self->{get_content} = \ &{$self->{options}{get_content}}; bless $self, $class; } sub _build_headers { my $self = shift; my $cols = $self->sqlite_table_info; # headers : names of columns, without type information $self->{headers} = [ map {$_->{name}} @$cols ]; } #---------------------------------------------------------------------- # method for initiating a search #---------------------------------------------------------------------- sub BEST_INDEX { my ($self, $constraints, $order_by) = @_; $self->_build_headers if !$self->{headers}; my @conditions; my $ix = 0; foreach my $constraint (grep {$_->{usable}} @$constraints) { my $col = $constraint->{col}; # if this is the content column, skip because we can't filter on it next if $col == 0; # for other columns, build a fragment for SQL WHERE on the underlying table my $colname = $col == -1 ? "rowid" : $self->{headers}[$col]; push @conditions, "$colname $constraint->{op} ?"; $constraint->{argvIndex} = $ix++; $constraint->{omit} = 1; # SQLite doesn't need to re-check the op } # TODO : exploit $order_by to add ordering clauses within idxStr my $outputs = { idxNum => 1, idxStr => join(" AND ", @conditions), orderByConsumed => 0, estimatedCost => 1.0, estimatedRows => undef, }; return $outputs; } #---------------------------------------------------------------------- # method for preventing updates #---------------------------------------------------------------------- sub _SQLITE_UPDATE { my ($self, $old_rowid, $new_rowid, @values) = @_; die "attempt to update a readonly virtual table"; } #---------------------------------------------------------------------- # file slurping function (not a method!) #---------------------------------------------------------------------- sub get_content { my ($path, $root) = @_; $path = "$root/$path" if $root; my $content = ""; if (open my $fh, "<", $path) { local $/; # slurp the whole file into a scalar $content = <$fh>; close $fh; } else { warn "can't open $path"; } return $content; } #====================================================================== package DBD::SQLite::VirtualTable::FileContent::Cursor; #====================================================================== use strict; use warnings; use base "DBD::SQLite::VirtualTable::Cursor"; sub FILTER { my ($self, $idxNum, $idxStr, @values) = @_; my $vtable = $self->{vtable}; # build SQL local $" = ", "; my @cols = @{$vtable->{headers}}; $cols[0] = 'rowid'; # replace the content column by the rowid push @cols, $vtable->{options}{path_col}; # path col in last position my $sql = "SELECT @cols FROM $vtable->{options}{source}"; $sql .= " WHERE $idxStr" if $idxStr; # request on the index table my $dbh = $vtable->dbh; $self->{sth} = $dbh->prepare($sql) or die DBI->errstr; $self->{sth}->execute(@values); $self->{row} = $self->{sth}->fetchrow_arrayref; return; } sub EOF { my ($self) = @_; return !$self->{row}; } sub NEXT { my ($self) = @_; $self->{row} = $self->{sth}->fetchrow_arrayref; } sub COLUMN { my ($self, $idxCol) = @_; return $idxCol == 0 ? $self->file_content : $self->{row}[$idxCol]; } sub ROWID { my ($self) = @_; return $self->{row}[0]; } sub file_content { my ($self) = @_; my $root = $self->{vtable}{options}{root}; my $path = $self->{row}[-1]; my $get_content_func = $self->{vtable}{get_content}; return $get_content_func->($path, $root); } 1; __END__ =head1 NAME DBD::SQLite::VirtualTable::FileContent -- virtual table for viewing file contents =head1 SYNOPSIS Within Perl : $dbh->sqlite_create_module(fcontent => "DBD::SQLite::VirtualTable::FileContent"); Then, within SQL : CREATE VIRTUAL TABLE tbl USING fcontent( source = src_table, content_col = content, path_col = path, expose = "path, col1, col2, col3", -- or "*" root = "/foo/bar" get_content = Foo::Bar::read_from_file ); SELECT col1, path, content FROM tbl WHERE ...; =head1 DESCRIPTION A "FileContent" virtual table is bound to some underlying I<source table>, which has a column containing paths to files. The virtual table behaves like a database view on the source table, with an added column which exposes the content from those files. This is especially useful as an "external content" to some fulltext table (see L<DBD::SQLite::Fulltext_search>) : the index table stores some metadata about files, and then the fulltext engine can index both the metadata and the file contents. =head1 PARAMETERS Parameters for creating a C<FileContent> virtual table are specified within the C<CREATE VIRTUAL TABLE> statement, just like regular column declarations, but with an '=' sign. Authorized parameters are : =over =item C<source> The name of the I<source table>. This parameter is mandatory. All other parameters are optional. =item C<content_col> The name of the virtual column exposing file contents. The default is C<content>. =item C<path_col> The name of the column in C<source> that contains paths to files. The default is C<path>. =item C<expose> A comma-separated list (within double quotes) of source column names to be exposed by the virtual table. The default is C<"*">, which means all source columns. =item C<root> An optional root directory that will be prepended to the I<path> column when opening files. =item C<get_content> Fully qualified name of a Perl function for reading file contents. The default implementation just slurps the entire file into a string; but this hook can point to more sophisticated implementations, like for example a function that would remove html tags. The hooked function is called like this : $file_content = $get_content->($path, $root); =back =head1 AUTHOR Laurent Dami E<lt>dami@cpan.orgE<gt> =head1 COPYRIGHT AND LICENSE Copyright Laurent Dami, 2014. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut