You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
6.9 KiB
PHP

<?php
namespace BSR\Lib\Search;
use BSR\Lib\Configuration;
use BSR\Lib\Exception\WebException;
mb_http_output('UTF-8');
class BookSearch
{
/** @var \SolrClient */
private $client;
/** @var \SolrQuery */
private $query;
private $queryParts = array();
public function __construct()
{
$options = array
(
'hostname' => Configuration::get('solr.server'),
'port' => Configuration::get('solr.port'),
'login' => Configuration::get('solr.username'),
'password' => Configuration::get('solr.password'),
'path' => Configuration::get('solr.path'),
);
$this->client = new \SolrClient($options);
$this->query = new \SolrDisMaxQuery();
// use the Extended DisMax Query parser
$this->query->useEDisMaxQueryParser();
// most options like search fields, sorting, etc are already set
// as default in the Solr config and thus should be set only on a
// per request basis when needed
}
public function addOrQuery(array $texts, $field)
{
if(count($texts) > 0) {
$texts = array_map(array('SolrUtils', 'escapeQueryChars'), $texts);
$query = sprintf('%s:("%s")', $field, implode('" OR "', $texts));
$this->addQuery($query, null, false);
}
}
public function addQuery($queryText, $queryField = null, $escape = true)
{
if($escape) {
$queryText = \SolrUtils::escapeQueryChars($queryText);
}
if (strlen($queryField) > 0) {
$queryText = "$queryField:\"$queryText\"";
}
$this->queryParts[] = $queryText;
}
public function addRange($field, $min = '*', $max = '*')
{
$this->queryParts[] = sprintf('%s:[%s TO %s]', $field, $min, $max);
}
public function addSortField($field, $order = \SolrQuery::ORDER_DESC)
{
$this->query->addSortField($field, $order);
}
public function addFacetField($field)
{
$this->query->addFacetField($field);
}
public function setFacetLimits($limit = null, $count = null)
{
if(! is_null($limit)) {
$this->query->setFacetLimit($limit);
}
if(! is_null($count)) {
$this->query->setFacetMinCount($count);
}
}
/**
* @param int $start
* @param int $count
* @return array
* @throws WebException
*/
public function getResults($start = 0, $count = 15)
{
if (count($this->queryParts) == 0)
$query = '*:*';
else {
$query = implode(' AND ', $this->queryParts);
}
$this->query->setQuery($query);
$this->query->setStart($start);
$this->query->setRows($count);
try {
$results = $this->client->query($this->query)->getArrayResponse();
} catch(\SolrClientException $e) {
throw new WebException ("SolrError", $e->getMessage(), -700);
}
$books = array();
if(isset($results['response']['docs']) && is_array($results['response']['docs'])) {
foreach($results['response']['docs'] as $r) {
$books[$r['id']] = $r;
}
}
$highlighting = array();
if(isset($results['highlighting'])) {
foreach($results['highlighting'] as $k => $h) {
$data = array();
foreach($h as $f => $v) {
$data[str_replace('_fr', '', $f)] = reset($v);
}
$highlighting[$k] = $data;
}
}
$spelling = array();
if(isset($results['spellcheck']['suggestions'])) {
foreach($results['spellcheck']['suggestions'] as $s) {
$spelling[] = $s;
}
}
$facets = array();
if(isset($results['facet_counts']['facet_fields'])) {
foreach($results['facet_counts']['facet_fields'] as $f => $d) {
$facets[$f] = $d;
}
}
return array(
'count' => $results['response']['numFound'],
'facets' => array(
'facets' => $facets,
'highlighting' => $highlighting,
'spelling' => $spelling,
),
'books' => $books,
);
}
/**
* Return a list of suggested titles for the given text
* @param string $text
* @return array
* @throws WebException
*/
public function suggest($text) {
$this->query->setQuery($text);
$this->query->setStart(0);
$this->query->setRows(0);
$this->query->setParam('suggest', 'true');
$this->query->setParam('facet', 'false');
$this->query->setParam('hl', 'false');
$this->query->setParam('spellcheck', 'false');
try {
$results = $this->client->query($this->query)->getArrayResponse();
} catch(\SolrClientException $e) {
throw new WebException ("SolrError", $e->getMessage(), -700);
}
$text = mb_strtolower ($text, 'UTF-8');
$suggestions = array();
if(isset($results['suggest'])) {
foreach($results['suggest'] as $suggester) {
foreach($suggester[$text]['suggestions'] as $s) {
$s['term'] = strip_tags($s['term']);
$pos = strpos(mb_strtolower($s['term'], 'UTF-8'), $text);
if($pos !== false) {
// increase weight of proposition that have the words at the beginning
$s['weight'] += (int) ((1 - $pos / strlen($s['term'])) * 100);
}
$suggestions[$s['term']] = (array) $s;
}
}
}
usort($suggestions, function($a, $b) {
return $b['weight'] - $a['weight'];
});
return $suggestions;
}
/**
* Retrieve books from Solr based on their code (NoticeNr).
*
* @param array $codes
* @param string $field the field to use for the search
* @return array Books information
* @throws WebException
*/
public static function GetBooks(array $codes, $field = 'code') {
// it is faster to do multiple small request to Solr rather than one big so separate
// in chunks if we are above the limit. 15 was found by testing and seems to be a sweet spot
$limit = 15;
$count = count($codes);
if($count > $limit) {
$parts = array_chunk($codes, $limit);
$books = array();
foreach($parts as $p) {
// if we use array_merge here the numerical keys (book code) will be lost
$books += self::GetBooks($p, $field);
}
return $books;
}
$bs = new static();
$bs->addOrQuery($codes, $field);
$results = $bs->getResults(0, $count);
return $results['books'];
}
}