You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
10 KiB
PHP
328 lines
10 KiB
PHP
<?php
|
|
|
|
namespace BSR\Lib\Search;
|
|
|
|
use BSR\Lib\Configuration;
|
|
use BSR\Lib\Exception\WebException;
|
|
use BSR\Lib\Logger;
|
|
|
|
mb_http_output('UTF-8');
|
|
|
|
class BookSearch
|
|
{
|
|
/** @var \SolrClient */
|
|
private $client;
|
|
/** @var \SolrQuery */
|
|
private $query;
|
|
/** @var array parts of the query, parameter 'q' */
|
|
private $queryParts = array();
|
|
/** @var array parts of the filter query, parameter 'fq' */
|
|
private $filterQueryParts = array();
|
|
|
|
public function __construct($edismax = true)
|
|
{
|
|
$options = array
|
|
(
|
|
'hostname' => Configuration::get('solr.server'),
|
|
'port' => Configuration::get('solr.port'),
|
|
'login' => Configuration::get('solr.username'),
|
|
'password' => Configuration::get('solr.password'),
|
|
'path' => Configuration::get('solr.path'),
|
|
);
|
|
|
|
$this->client = new \SolrClient($options);
|
|
|
|
if($edismax) {
|
|
$this->query = new \SolrDisMaxQuery();
|
|
$this->query->useEDisMaxQueryParser();
|
|
} else {
|
|
$this->query = new \SolrQuery();
|
|
}
|
|
|
|
// most options like search fields, sorting, etc are already set
|
|
// as default in the Solr config and thus should be set only on a
|
|
// per request basis when needed
|
|
|
|
/* if sometime we need to set the fields explicitly, those should be the ones we want :
|
|
$this->query->addField('id, code, isbn');
|
|
$this->query->addField('editor, editorTown, year, producer, producerCode, availabilityDate, collection');
|
|
$this->query->addField('title, author, reader, summary');
|
|
$this->query->addField('jeunesse, genre, genreCode, motsMatieres, cdu');
|
|
$this->query->addField('media, mediaType, cover, samples, zip, zip_size');
|
|
*/
|
|
}
|
|
|
|
public function setHandler($handler)
|
|
{
|
|
$this->client->setServlet(\SolrClient::SEARCH_SERVLET_TYPE, $handler);
|
|
}
|
|
|
|
public function addCompoundQuery(array $texts, $field, $operator)
|
|
{
|
|
if(count($texts) > 0) {
|
|
$texts = array_map(array('SolrUtils', 'escapeQueryChars'), $texts);
|
|
$query = sprintf('%s:("%s")', $field, implode('" '.$operator.'"', $texts));
|
|
$this->addQuery($query, null, false);
|
|
}
|
|
}
|
|
|
|
public function addOrQuery(array $texts, $field)
|
|
{
|
|
$this->addCompoundQuery($texts, $field, 'OR');
|
|
}
|
|
|
|
public function addAndQuery(array $texts, $field)
|
|
{
|
|
$this->addCompoundQuery($texts, $field, 'AND');
|
|
}
|
|
|
|
public function addQuery($queryText, $queryField = null, $escape = true)
|
|
{
|
|
if($escape) {
|
|
$queryText = \SolrUtils::escapeQueryChars($queryText);
|
|
}
|
|
|
|
if($queryField == 'mediaType' and $queryText=='noCDS'){
|
|
$queryText='-mediaType:CDS';
|
|
} else if (strlen($queryField) > 0) {
|
|
$queryText = sprintf('%s:"%s"', $queryField, $queryText);
|
|
}
|
|
|
|
|
|
$this->queryParts[] = $queryText;
|
|
}
|
|
|
|
public function addFilterQuery($text, $field, $escape = true)
|
|
{
|
|
if($escape) {
|
|
$text = \SolrUtils::escapeQueryChars($text);
|
|
}
|
|
$this->filterQueryParts[] = sprintf('%s:"%s"', $field, $text);
|
|
}
|
|
|
|
public function addRange($field, $min = '*', $max = '*')
|
|
{
|
|
$this->filterQueryParts[] = sprintf('%s:[%s TO %s]', $field, $min, $max);
|
|
}
|
|
|
|
public function addSortField($field, $order = \SolrQuery::ORDER_DESC)
|
|
{
|
|
$this->query->addSortField($field, $order);
|
|
}
|
|
|
|
public function addFacetField($field)
|
|
{
|
|
$this->query->addFacetField($field);
|
|
}
|
|
|
|
public function setFacetRangeField($field)
|
|
{
|
|
$this->query->setParam('facet.range', $field);
|
|
}
|
|
|
|
public function setFacetLimits($limit = null, $count = null)
|
|
{
|
|
if(! is_null($limit)) {
|
|
$this->query->setFacetLimit($limit);
|
|
}
|
|
|
|
if(! is_null($count)) {
|
|
$this->query->setFacetMinCount($count);
|
|
}
|
|
}
|
|
|
|
public function setFacetRange($start, $end, $gap)
|
|
{
|
|
$this->query->setParam('facet.range.start', $start);
|
|
$this->query->setParam('facet.range.end', $end);
|
|
$this->query->setParam('facet.range.gap', $gap);
|
|
}
|
|
|
|
/**
|
|
* @param int $start
|
|
* @param int $count
|
|
* @param bool $facets activate faceting ?
|
|
* @param bool $spellcheck activate spellcheck ?
|
|
* @param bool $highlight activate highlighting ?
|
|
* @return array
|
|
* @throws WebException
|
|
*/
|
|
public function getResults($start = 0, $count = 15, $facets = false, $spellcheck = false, $highlight = false)
|
|
{
|
|
//Logger::log(print_r($this->queryParts, true), $verbosity = Logger::QUIET);
|
|
if (count($this->queryParts) == 0)
|
|
$query = '*:*';
|
|
else {
|
|
$query = implode(' AND ', $this->queryParts);
|
|
}
|
|
foreach($this->filterQueryParts as $fq) {
|
|
$this->query->addFilterQuery($fq);
|
|
}
|
|
$this->query->setQuery($query);
|
|
$this->query->setStart($start);
|
|
$this->query->setRows($count);
|
|
|
|
$this->query->setParam('facet', $facets ? 'true' : 'false');
|
|
$this->query->setParam('hl', $highlight ? 'true' : 'false');
|
|
$this->query->setParam('spellcheck', $spellcheck ? 'true' : 'false');
|
|
|
|
|
|
try {
|
|
$results = $this->client->query($this->query)->getArrayResponse();
|
|
} catch(\SolrException $e) {
|
|
throw new WebException ("SolrError", $e->getMessage(), -700);
|
|
}
|
|
|
|
$books = array();
|
|
if(isset($results['response']['docs']) && is_array($results['response']['docs'])) {
|
|
foreach($results['response']['docs'] as $r) {
|
|
$books[$r['id']] = $r;
|
|
}
|
|
}
|
|
|
|
$highlighting = array();
|
|
if(isset($results['highlighting'])) {
|
|
foreach($results['highlighting'] as $k => $h) {
|
|
$data = array();
|
|
foreach($h as $f => $v) {
|
|
$data[str_replace('_fr', '', $f)] = reset($v);
|
|
}
|
|
$highlighting[$k] = $data;
|
|
}
|
|
}
|
|
|
|
$spelling = array();
|
|
if(isset($results['spellcheck']['suggestions'])) {
|
|
foreach($results['spellcheck']['suggestions'] as $word => $s) {
|
|
$spelling[$word] = $s['suggestion'];
|
|
}
|
|
}
|
|
|
|
$facets = array();
|
|
if(isset($results['facet_counts']['facet_fields'])) {
|
|
foreach($results['facet_counts']['facet_fields'] as $f => $d) {
|
|
$facets[$f] = $d;
|
|
}
|
|
}
|
|
if(isset($results['facet_counts']['facet_ranges'])) {
|
|
$integer = strpos($this->query->getParam('facet.range.gap'), '.') === false;
|
|
foreach($results['facet_counts']['facet_ranges'] as $f => $d) {
|
|
if($integer) {
|
|
$facets[$f] = array();
|
|
foreach($d['counts'] as $k => $v) {
|
|
$facets[$f][intval($k)] = $v;
|
|
}
|
|
} else {
|
|
$facets[$f] = $d['counts'];
|
|
}
|
|
}
|
|
}
|
|
|
|
return array(
|
|
'count' => $results['response']['numFound'],
|
|
'facets' => array(
|
|
'facets' => $facets,
|
|
'highlighting' => $highlighting,
|
|
'spelling' => $spelling,
|
|
),
|
|
'books' => $books,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Return a list of suggested titles for the given text
|
|
* @param string $text
|
|
* @return array
|
|
* @throws WebException
|
|
*/
|
|
public function suggest($text) {
|
|
$this->query->setQuery($text);
|
|
$this->query->setStart(0);
|
|
$this->query->setRows(0);
|
|
$this->query->setParam('suggest', 'true');
|
|
|
|
try {
|
|
$results = $this->client->query($this->query)->getArrayResponse();
|
|
} catch(\SolrClientException $e) {
|
|
throw new WebException ("SolrError", $e->getMessage(), -700);
|
|
}
|
|
|
|
$text = mb_strtolower ($text, 'UTF-8');
|
|
|
|
$suggestions = array();
|
|
if(isset($results['suggest'])) {
|
|
foreach($results['suggest'] as $suggester) {
|
|
foreach($suggester[$text]['suggestions'] as $s) {
|
|
$s['term'] = strip_tags($s['term']);
|
|
|
|
$pos = strpos(mb_strtolower($s['term'], 'UTF-8'), $text);
|
|
if($pos !== false) {
|
|
// increase weight of proposition that have the words at the beginning
|
|
$s['weight'] += (int) ((1 - $pos / strlen($s['term'])) * 100);
|
|
}
|
|
$suggestions[$s['term']] = (array) $s;
|
|
}
|
|
}
|
|
}
|
|
|
|
usort($suggestions, function($a, $b) {
|
|
return $b['weight'] - $a['weight'];
|
|
});
|
|
|
|
return $suggestions;
|
|
}
|
|
|
|
/**
|
|
* Retrieve books from Solr based on their code (NoticeNr).
|
|
*
|
|
* @param array $codes
|
|
* @param string $field the field to use for the search
|
|
* @return array Books information
|
|
* @throws WebException
|
|
*/
|
|
public static function GetBooks(array $codes, $field = 'code') {
|
|
// it is faster to do multiple small request to Solr rather than one big so separate
|
|
// in chunks if we are above the limit. 15 was found by testing and seems to be a sweet spot
|
|
$limit = 15;
|
|
$count = count($codes);
|
|
if($count > $limit) {
|
|
$parts = array_chunk($codes, $limit);
|
|
$books = array();
|
|
foreach($parts as $p) {
|
|
// if we use array_merge here the numerical keys (book code) will be lost
|
|
$books += self::GetBooks($p, $field);
|
|
}
|
|
return $books;
|
|
}
|
|
|
|
$bs = new static();
|
|
$bs->addOrQuery($codes, $field);
|
|
|
|
$results = $bs->getResults(0, $count);
|
|
return $results['books'];
|
|
}
|
|
|
|
public static function GetTerms($field) {
|
|
$s = new BookSearch();
|
|
$s->addFilterQuery(1, 'visible');
|
|
$s->addFacetField($field);
|
|
$s->setFacetLimits(2000, 2);
|
|
$results = $s->getResults(0, 0, true);
|
|
|
|
return $results['facets']['facets'][$field];
|
|
}
|
|
|
|
|
|
public static function GetTermsRange($field) {
|
|
$s = new BookSearch();
|
|
$s->addFilterQuery(1, 'visible');
|
|
$s->setFacetRangeField($field);
|
|
$s->setFacetRange(0, 250 * 60, 30);
|
|
// to avoid useless calculation, only set this 'normal' facet
|
|
$s->addFacetField('visible');
|
|
$results = $s->getResults(0, 0, true);
|
|
|
|
return $results['facets']['facets'][$field];
|
|
}
|
|
}
|