/home/preegmxb/byeaglytics-co.com/administrator/components/com_finder/src/Indexer/Helper.php
<?php
/**
 * @package     Joomla.Administrator
 * @subpackage  com_finder
 *
 * @copyright   (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
 * @license     GNU General Public License version 2 or later; see LICENSE.txt
 */

namespace Joomla\Component\Finder\Administrator\Indexer;

\defined('_JEXEC') or die;

use Exception;
use Joomla\CMS\Component\ComponentHelper;
use Joomla\CMS\Factory;
use Joomla\CMS\Language\Multilanguage;
use Joomla\CMS\Plugin\PluginHelper;
use Joomla\CMS\Table\Table;
use Joomla\Registry\Registry;
use Joomla\String\StringHelper;

/**
 * Helper class for the Finder indexer package.
 *
 * @since  2.5
 */
class Helper
{
	/**
	 * Method to parse input into plain text.
	 *
	 * @param   string  $input   The raw input.
	 * @param   string  $format  The format of the input. [optional]
	 *
	 * @return  string  The parsed input.
	 *
	 * @since   2.5
	 * @throws  Exception on invalid parser.
	 */
	public static function parse($input, $format = 'html')
	{
		// Get a parser for the specified format and parse the input.
		return Parser::getInstance($format)->parse($input);
	}

	/**
	 * Method to tokenize a text string.
	 *
	 * @param   string   $input   The input to tokenize.
	 * @param   string   $lang    The language of the input.
	 * @param   boolean  $phrase  Flag to indicate whether input could be a phrase. [optional]
	 *
	 * @return  Token[]  An array of Token objects.
	 *
	 * @since   2.5
	 */
	public static function tokenize($input, $lang, $phrase = false)
	{
		static $cache = [], $tuplecount;
		static $multilingual;
		static $defaultLanguage;

		if (!$tuplecount)
		{
			$params = ComponentHelper::getParams('com_finder');
			$tuplecount = $params->get('tuplecount', 1);
		}

		if (is_null($multilingual))
		{
			$multilingual = Multilanguage::isEnabled();
			$config = ComponentHelper::getParams('com_finder');

			if ($config->get('language_default', '') == '')
			{
				$defaultLang = '*';
			}
			elseif ($config->get('language_default', '') == '-1')
			{
				$defaultLang = self::getDefaultLanguage();
			}
			else
			{
				$defaultLang = $config->get('language_default');
			}

			/*
			 * The default language always has the language code '*'.
			 * In order to not overwrite the language code of the language
			 * object that we are using, we are cloning it here.
			 */
			$obj = Language::getInstance($defaultLang);
			$defaultLanguage = clone $obj;
			$defaultLanguage->language = '*';
		}

		if (!$multilingual || $lang == '*')
		{
			$language = $defaultLanguage;
		}
		else
		{
			$language = Language::getInstance($lang);
		}

		if (!isset($cache[$lang]))
		{
			$cache[$lang] = [];
		}

		$tokens = array();
		$terms = $language->tokenise($input);

		// TODO: array_filter removes any number 0's from the terms. Not sure this is entirely intended
		$terms = array_filter($terms);
		$terms = array_values($terms);

		/*
		 * If we have to handle the input as a phrase, that means we don't
		 * tokenize the individual terms and we do not create the two and three
		 * term combinations. The phrase must contain more than one word!
		 */
		if ($phrase === true && count($terms) > 1)
		{
			// Create tokens from the phrase.
			$tokens[] = new Token($terms, $language->language, $language->spacer);
		}
		else
		{
			// Create tokens from the terms.
			for ($i = 0, $n = count($terms); $i < $n; $i++)
			{
				if (isset($cache[$lang][$terms[$i]]))
				{
					$tokens[] = $cache[$lang][$terms[$i]];
				}
				else
				{
					$token = new Token($terms[$i], $language->language);
					$tokens[] = $token;
					$cache[$lang][$terms[$i]] = $token;
				}
			}

			// Create multi-word phrase tokens from the individual words.
			if ($tuplecount > 1)
			{
				for ($i = 0, $n = count($tokens); $i < $n; $i++)
				{
					$temp = array($tokens[$i]->term);

					// Create tokens for 2 to $tuplecount length phrases
					for ($j = 1; $j < $tuplecount; $j++)
					{
						if ($i + $j >= $n || !isset($tokens[$i + $j]))
						{
							break;
						}

						$temp[] = $tokens[$i + $j]->term;
						$key = implode('::', $temp);

						if (isset($cache[$lang][$key]))
						{
							$tokens[] = $cache[$lang][$key];
						}
						else
						{
							$token = new Token($temp, $language->language, $language->spacer);
							$token->derived = true;
							$tokens[] = $token;
							$cache[$lang][$key] = $token;
						}
					}
				}
			}
		}

		// Prevent the cache to fill up the memory
		while (count($cache[$lang]) > 1024)
		{
			/**
			 * We want to cache the most common words/tokens. At the same time
			 * we don't want to cache too much. The most common words will also
			 * be early in the text, so we are dropping all terms/tokens which
			 * have been cached later.
			 */
			array_pop($cache[$lang]);
		}

		return $tokens;
	}

	/**
	 * Method to get the base word of a token.
	 *
	 * @param   string  $token  The token to stem.
	 * @param   string  $lang   The language of the token.
	 *
	 * @return  string  The root token.
	 *
	 * @since   2.5
	 */
	public static function stem($token, $lang)
	{
		static $multilingual;
		static $defaultStemmer;

		if (is_null($multilingual))
		{
			$multilingual = Multilanguage::isEnabled();
			$config = ComponentHelper::getParams('com_finder');

			if ($config->get('language_default', '') == '')
			{
				$defaultStemmer = Language::getInstance('*');
			}
			elseif ($config->get('language_default', '') == '-1')
			{
				$defaultStemmer = Language::getInstance(self::getDefaultLanguage());
			}
			else
			{
				$defaultStemmer = Language::getInstance($config->get('language_default'));
			}
		}

		if (!$multilingual || $lang == '*')
		{
			$language = $defaultStemmer;
		}
		else
		{
			$language = Language::getInstance($lang);
		}

		return $language->stem($token);
	}

	/**
	 * Method to add a content type to the database.
	 *
	 * @param   string  $title  The type of content. For example: PDF
	 * @param   string  $mime   The mime type of the content. For example: PDF [optional]
	 *
	 * @return  integer  The id of the content type.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	public static function addContentType($title, $mime = null)
	{
		static $types;

		$db    = Factory::getDbo();
		$query = $db->getQuery(true);

		// Check if the types are loaded.
		if (empty($types))
		{
			// Build the query to get the types.
			$query->select('*')
				->from($db->quoteName('#__finder_types'));

			// Get the types.
			$db->setQuery($query);
			$types = $db->loadObjectList('title');
		}

		// Check if the type already exists.
		if (isset($types[$title]))
		{
			return (int) $types[$title]->id;
		}

		// Add the type.
		$query->clear()
			->insert($db->quoteName('#__finder_types'))
			->columns(array($db->quoteName('title'), $db->quoteName('mime')))
			->values($db->quote($title) . ', ' . $db->quote($mime));
		$db->setQuery($query);
		$db->execute();

		// Return the new id.
		return (int) $db->insertid();
	}

	/**
	 * Method to check if a token is common in a language.
	 *
	 * @param   string  $token  The token to test.
	 * @param   string  $lang   The language to reference.
	 *
	 * @return  boolean  True if common, false otherwise.
	 *
	 * @since   2.5
	 */
	public static function isCommon($token, $lang)
	{
		static $data, $default, $multilingual;

		if (is_null($multilingual))
		{
			$multilingual = Multilanguage::isEnabled();
			$config = ComponentHelper::getParams('com_finder');

			if ($config->get('language_default', '') == '')
			{
				$default = '*';
			}
			elseif ($config->get('language_default', '') == '-1')
			{
				$default = self::getPrimaryLanguage(self::getDefaultLanguage());
			}
			else
			{
				$default = self::getPrimaryLanguage($config->get('language_default'));
			}
		}

		if (!$multilingual || $lang == '*')
		{
			$lang = $default;
		}

		// Load the common tokens for the language if necessary.
		if (!isset($data[$lang]))
		{
			$data[$lang] = self::getCommonWords($lang);
		}

		// Check if the token is in the common array.
		return in_array($token, $data[$lang], true);
	}

	/**
	 * Method to get an array of common terms for a language.
	 *
	 * @param   string  $lang  The language to use.
	 *
	 * @return  array  Array of common terms.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	public static function getCommonWords($lang)
	{
		$db = Factory::getDbo();

		// Create the query to load all the common terms for the language.
		$query = $db->getQuery(true)
			->select($db->quoteName('term'))
			->from($db->quoteName('#__finder_terms_common'))
			->where($db->quoteName('language') . ' = ' . $db->quote($lang));

		// Load all of the common terms for the language.
		$db->setQuery($query);

		return $db->loadColumn();
	}

	/**
	 * Method to get the default language for the site.
	 *
	 * @return  string  The default language string.
	 *
	 * @since   2.5
	 */
	public static function getDefaultLanguage()
	{
		static $lang;

		// We need to go to com_languages to get the site default language, it's the best we can guess.
		if (empty($lang))
		{
			$lang = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
		}

		return $lang;
	}

	/**
	 * Method to parse a language/locale key and return a simple language string.
	 *
	 * @param   string  $lang  The language/locale key. For example: en-GB
	 *
	 * @return  string  The simple language string. For example: en
	 *
	 * @since   2.5
	 */
	public static function getPrimaryLanguage($lang)
	{
		static $data;

		// Only parse the identifier if necessary.
		if (!isset($data[$lang]))
		{
			if (is_callable(array('Locale', 'getPrimaryLanguage')))
			{
				// Get the language key using the Locale package.
				$data[$lang] = \Locale::getPrimaryLanguage($lang);
			}
			else
			{
				// Get the language key using string position.
				$data[$lang] = StringHelper::substr($lang, 0, StringHelper::strpos($lang, '-'));
			}
		}

		return $data[$lang];
	}

	/**
	 * Method to get extra data for a content before being indexed. This is how
	 * we add Comments, Tags, Labels, etc. that should be available to Finder.
	 *
	 * @param   Result  $item  The item to index as a Result object.
	 *
	 * @return  boolean  True on success, false on failure.
	 *
	 * @since   2.5
	 * @throws  Exception on database error.
	 */
	public static function getContentExtras(Result $item)
	{
		// Load the finder plugin group.
		PluginHelper::importPlugin('finder');

		Factory::getApplication()->triggerEvent('onPrepareFinderContent', array(&$item));

		return true;
	}

	/**
	 * Method to process content text using the onContentPrepare event trigger.
	 *
	 * @param   string    $text    The content to process.
	 * @param   Registry  $params  The parameters object. [optional]
	 * @param   Result    $item    The item which get prepared. [optional]
	 *
	 * @return  string  The processed content.
	 *
	 * @since   2.5
	 */
	public static function prepareContent($text, $params = null, Result $item = null)
	{
		static $loaded;

		// Load the content plugins if necessary.
		if (empty($loaded))
		{
			PluginHelper::importPlugin('content');
			$loaded = true;
		}

		// Instantiate the parameter object if necessary.
		if (!($params instanceof Registry))
		{
			$registry = new Registry($params);
			$params = $registry;
		}

		// Create a mock content object.
		$content       = Table::getInstance('Content');
		$content->text = $text;

		if ($item)
		{
			$content->bind((array) $item);
			$content->bind($item->getElements());
		}

		if ($item && !empty($item->context))
		{
			$content->context = $item->context;
		}

		// Fire the onContentPrepare event.
		Factory::getApplication()->triggerEvent('onContentPrepare', array('com_finder.indexer', &$content, &$params, 0));

		return $content->text;
	}
}