You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

328 lines
10 KiB
PHP

<?php
namespace BSR\Lib\Search;
use BSR\Lib\Configuration;
use BSR\Lib\Exception\WebException;
use BSR\Lib\Logger;
mb_http_output('UTF-8');
class BookSearch
{
/** @var \SolrClient */
private $client;
/** @var \SolrQuery */
private $query;
/** @var array parts of the query, parameter 'q' */
private $queryParts = array();
/** @var array parts of the filter query, parameter 'fq' */
private $filterQueryParts = array();
public function __construct($edismax = true)
{
$options = array
(
'hostname' => Configuration::get('solr.server'),
'port' => Configuration::get('solr.port'),
'login' => Configuration::get('solr.username'),
'password' => Configuration::get('solr.password'),
'path' => Configuration::get('solr.path'),
);
$this->client = new \SolrClient($options);
if($edismax) {
$this->query = new \SolrDisMaxQuery();
$this->query->useEDisMaxQueryParser();
} else {
$this->query = new \SolrQuery();
}
// most options like search fields, sorting, etc are already set
// as default in the Solr config and thus should be set only on a
// per request basis when needed
/* if sometime we need to set the fields explicitly, those should be the ones we want :
$this->query->addField('id, code, isbn');
$this->query->addField('editor, editorTown, year, producer, producerCode, availabilityDate, collection');
$this->query->addField('title, author, reader, summary');
$this->query->addField('jeunesse, genre, genreCode, motsMatieres, cdu');
$this->query->addField('media, mediaType, cover, samples, zip, zip_size');
*/
}
public function setHandler($handler)
{
$this->client->setServlet(\SolrClient::SEARCH_SERVLET_TYPE, $handler);
}
public function addCompoundQuery(array $texts, $field, $operator)
{
if(count($texts) > 0) {
$texts = array_map(array('SolrUtils', 'escapeQueryChars'), $texts);
$query = sprintf('%s:("%s")', $field, implode('" '.$operator.'"', $texts));
$this->addQuery($query, null, false);
}
}
public function addOrQuery(array $texts, $field)
{
$this->addCompoundQuery($texts, $field, 'OR');
}
public function addAndQuery(array $texts, $field)
{
$this->addCompoundQuery($texts, $field, 'AND');
}
public function addQuery($queryText, $queryField = null, $escape = true)
{
if($escape) {
$queryText = \SolrUtils::escapeQueryChars($queryText);
}
if($queryField == 'mediaType' and $queryText=='noCDS'){
$queryText='-mediaType:CDS';
} else if (strlen($queryField) > 0) {
$queryText = sprintf('%s:"%s"', $queryField, $queryText);
}
$this->queryParts[] = $queryText;
}
public function addFilterQuery($text, $field, $escape = true)
{
if($escape) {
$text = \SolrUtils::escapeQueryChars($text);
}
$this->filterQueryParts[] = sprintf('%s:"%s"', $field, $text);
}
public function addRange($field, $min = '*', $max = '*')
{
$this->filterQueryParts[] = sprintf('%s:[%s TO %s]', $field, $min, $max);
}
public function addSortField($field, $order = \SolrQuery::ORDER_DESC)
{
$this->query->addSortField($field, $order);
}
public function addFacetField($field)
{
$this->query->addFacetField($field);
}
public function setFacetRangeField($field)
{
$this->query->setParam('facet.range', $field);
}
public function setFacetLimits($limit = null, $count = null)
{
if(! is_null($limit)) {
$this->query->setFacetLimit($limit);
}
if(! is_null($count)) {
$this->query->setFacetMinCount($count);
}
}
public function setFacetRange($start, $end, $gap)
{
$this->query->setParam('facet.range.start', $start);
$this->query->setParam('facet.range.end', $end);
$this->query->setParam('facet.range.gap', $gap);
}
/**
* @param int $start
* @param int $count
* @param bool $facets activate faceting ?
* @param bool $spellcheck activate spellcheck ?
* @param bool $highlight activate highlighting ?
* @return array
* @throws WebException
*/
public function getResults($start = 0, $count = 15, $facets = false, $spellcheck = false, $highlight = false)
{
//Logger::log(print_r($this->queryParts, true), $verbosity = Logger::QUIET);
if (count($this->queryParts) == 0)
$query = '*:*';
else {
$query = implode(' AND ', $this->queryParts);
}
foreach($this->filterQueryParts as $fq) {
$this->query->addFilterQuery($fq);
}
$this->query->setQuery($query);
$this->query->setStart($start);
$this->query->setRows($count);
$this->query->setParam('facet', $facets ? 'true' : 'false');
$this->query->setParam('hl', $highlight ? 'true' : 'false');
$this->query->setParam('spellcheck', $spellcheck ? 'true' : 'false');
try {
$results = $this->client->query($this->query)->getArrayResponse();
} catch(\SolrException $e) {
throw new WebException ("SolrError", $e->getMessage(), -700);
}
$books = array();
if(isset($results['response']['docs']) && is_array($results['response']['docs'])) {
foreach($results['response']['docs'] as $r) {
$books[$r['id']] = $r;
}
}
$highlighting = array();
if(isset($results['highlighting'])) {
foreach($results['highlighting'] as $k => $h) {
$data = array();
foreach($h as $f => $v) {
$data[str_replace('_fr', '', $f)] = reset($v);
}
$highlighting[$k] = $data;
}
}
$spelling = array();
if(isset($results['spellcheck']['suggestions'])) {
foreach($results['spellcheck']['suggestions'] as $word => $s) {
$spelling[$word] = $s['suggestion'];
}
}
$facets = array();
if(isset($results['facet_counts']['facet_fields'])) {
foreach($results['facet_counts']['facet_fields'] as $f => $d) {
$facets[$f] = $d;
}
}
if(isset($results['facet_counts']['facet_ranges'])) {
$integer = strpos($this->query->getParam('facet.range.gap'), '.') === false;
foreach($results['facet_counts']['facet_ranges'] as $f => $d) {
if($integer) {
$facets[$f] = array();
foreach($d['counts'] as $k => $v) {
$facets[$f][intval($k)] = $v;
}
} else {
$facets[$f] = $d['counts'];
}
}
}
return array(
'count' => $results['response']['numFound'],
'facets' => array(
'facets' => $facets,
'highlighting' => $highlighting,
'spelling' => $spelling,
),
'books' => $books,
);
}
/**
* Return a list of suggested titles for the given text
* @param string $text
* @return array
* @throws WebException
*/
public function suggest($text) {
$this->query->setQuery($text);
$this->query->setStart(0);
$this->query->setRows(0);
$this->query->setParam('suggest', 'true');
try {
$results = $this->client->query($this->query)->getArrayResponse();
} catch(\SolrClientException $e) {
throw new WebException ("SolrError", $e->getMessage(), -700);
}
$text = mb_strtolower ($text, 'UTF-8');
$suggestions = array();
if(isset($results['suggest'])) {
foreach($results['suggest'] as $suggester) {
foreach($suggester[$text]['suggestions'] as $s) {
$s['term'] = strip_tags($s['term']);
$pos = strpos(mb_strtolower($s['term'], 'UTF-8'), $text);
if($pos !== false) {
// increase weight of proposition that have the words at the beginning
$s['weight'] += (int) ((1 - $pos / strlen($s['term'])) * 100);
}
$suggestions[$s['term']] = (array) $s;
}
}
}
usort($suggestions, function($a, $b) {
return $b['weight'] - $a['weight'];
});
return $suggestions;
}
/**
* Retrieve books from Solr based on their code (NoticeNr).
*
* @param array $codes
* @param string $field the field to use for the search
* @return array Books information
* @throws WebException
*/
public static function GetBooks(array $codes, $field = 'code') {
// it is faster to do multiple small request to Solr rather than one big so separate
// in chunks if we are above the limit. 15 was found by testing and seems to be a sweet spot
$limit = 15;
$count = count($codes);
if($count > $limit) {
$parts = array_chunk($codes, $limit);
$books = array();
foreach($parts as $p) {
// if we use array_merge here the numerical keys (book code) will be lost
$books += self::GetBooks($p, $field);
}
return $books;
}
$bs = new static();
$bs->addOrQuery($codes, $field);
$results = $bs->getResults(0, $count);
return $results['books'];
}
public static function GetTerms($field) {
$s = new BookSearch();
$s->addFilterQuery(1, 'visible');
$s->addFacetField($field);
$s->setFacetLimits(2000, 2);
$results = $s->getResults(0, 0, true);
return $results['facets']['facets'][$field];
}
public static function GetTermsRange($field) {
$s = new BookSearch();
$s->addFilterQuery(1, 'visible');
$s->setFacetRangeField($field);
$s->setFacetRange(0, 250 * 60, 30);
// to avoid useless calculation, only set this 'normal' facet
$s->addFacetField('visible');
$results = $s->getResults(0, 0, true);
return $results['facets']['facets'][$field];
}
}