<?php
 
/***************************************************************************
 
 *   lib_dictionary.php (ver. 1.1.1)
 
 *   Copyright (c) 2007 David Frendin ([email protected])
 
 *
 
 *   This program is free software; you can redistribute it and/or modify
 
 *   it under the terms of the GNU General Public License as published by
 
 *   the Free Software Foundation; either version 2 of the License, or
 
 *   (at your option) any later version. See the GNU General Public License
 
 *   for more details.
 
 *
 
 ***************************************************************************/
 
 
/***************************************************************************
 
 *   Description:
 
 *   This library checks if a word is wrongly spelled, and makes 3 (or less/more) suggestions
 
 *   to what might be the word the user ment.
 
 *
 
 *   This spell lexicon uses mysql to store/load dictionary data and similar_text / metaphone
 
 *   to determine if a dictornary word might be a suitable correction.
 
 *
 
 *   The lib_dictionary library does _not_ require pspell or aspell, or any external
 
 *   applications or dictionaries.
 
 *
 
 *   Credits to:
 
 *   Myself for writing it, Reza Saleh ([email protected]) for inspiration and english wordlist,
 
 *   Oxymoron (php portalen) for optimization and you for reading it!
 
 *
 
 ***************************************************************************/
 
 
class dictclass
 
{
 
    var $is_loaded;
 
    var $dictionary;
 
 
    function spell_phrase($phrase, $debug_data = false)
 
    {
 
        if ($debug_data)
 
        {
 
            $t1 = microtime(true);
 
            $t2 = microtime(true);
 
            $t1 = microtime(true);
 
        }
 
        $words = explode(" ", $phrase);
 
        foreach ( $words as $word )
 
        {
 
            $word = ereg_replace("[^A-Za-z0-9]", "", $word);    //remove any special characters - makes
 
                                                                //it incompatible with non-english languages
 
                                                                //... done to remove quotation marks, comas, dots etc
 
            $word = strip_tags($word);
 
            if ($this->does_word_exist($word) == -1)
 
            {
 
                $phrase = str_replace($word, "<span style=\"color: #aa0000; font-weight: bold;\">$word</span>", $phrase);
 
            }
 
        }
 
        if ($debug_data)
 
        {
 
            $t2 = microtime(true);
 
            $phrase = $phrase . " " . sprintf('%.1f', ($t2 - $t1)*100 ) . "ms</b> (".sprintf('%.2f', ($t2 - $t1) )."s)";
 
        }
 
        return $phrase;
 
    }
 
    
 
    //
 
    // checkhighest
 
    // checks all values in dictionary array against the word.
 
    // # returns: -1 on empty, otherwise an sorted array with the top possible correct words, ranging from $match[0] and up
 
    //
 
    function checkhighest($word, $max = 2)
 
    {
 
        if (empty($this->dictionary))
 
            return -1;
 
 
        $cnt = 0;
 
        foreach ($this->dictionary as $lookup)
 
        {
 
            similar_text($lookup['word'], $word, $p);
 
            if ($p > $match[$cnt]['p'])
 
            {
 
                if ($cnt < $max)
 
                    $cnt++;
 
                for ($i=0; $i<=$cnt; $i++)
 
                {
 
                    if ($p > $match[$i]['p'])
 
                    {
 
                        $match[$i]['word'] = $lookup['word'];
 
                        $match[$i]['p'] = $p;
 
                        break;
 
                    }
 
                }
 
            }
 
        }
 
        return $match;
 
    }
 
 
    //
 
    // load_dictionary
 
    // does a search for possible correct words based on word lenght and metaphone.
 
    // # returns: -1 if not found any, or array containing all found words
 
    //
 
    function load_dictionary($word, $size=1)
 
    {
 
        global $db;
 
        $mphone = metaphone($word);
 
        if (strlen($mphone) > 3)
 
            $mphone = substr($mphone, 1, strlen($mphone)-2);
 
 
        $min = strlen($word) - $size-1;
 
        $max = strlen($word) + $size;
 
        if ($min < 1)
 
            $min = 1;
 
 
        $sql = "SELECT * FROM " . DICTIONARY_TABLE . " WHERE len BETWEEN {$min} AND {$max} AND (metaphone LIKE '{$mphone}%' OR metaphoner LIKE concat(reverse('{$mphone}'), '%'))";
 
        $result = $db->sql_query($sql) or die("error");
 
 
        $cnt = 0;
 
        $cnt = $db->sql_numrows($result);
 
 
        if ($cnt == 0)
 
        {
 
            $this->is_loaded = false;
 
            return false;
 
        }
 
        else
 
        {
 
            $this->is_loaded = true;
 
            $this->dictionary = $db->sql_fetchrowset($result);
 
            return $row;
 
        }
 
    }
 
 
    //
 
    // does_word_exist
 
    // connects to db to determine if the word exists in the dictionary (if so, the word is correctly spelled)
 
    // # returns: -1 on false, or a single-dimentional array from the db record
 
    //
 
    function does_word_exist($word)
 
    {
 
        global $db;
 
        $sql="SELECT * FROM " . DICTIONARY_TABLE . " WHERE ( word = '".strtolower($word)."' )";
 
        $result = $db->sql_query($sql) or die("sql could not connect [does_word_exist]");
 
        if ($db->sql_numrows($result) == 0)
 
            return -1;
 
        else
 
            return $row;
 
    }
 
 
    //
 
    // add_from_file
 
    // add new words from a file
 
    //
 
    function add_from_file($path)
 
    {
 
        global $db;
 
        $spc = array("\r\n", "\n", "\r");
 
        set_time_limit(400);    //it takes time to process large wordlists
 
        $handle = @fopen($path, "r");
 
        if ($handle)
 
        {
 
            while (!feof($handle))
 
            {
 
                $line = fgets($handle, 4096);
 
                $line = str_replace($spc, "", $line);
 
                $line = trim($line);
 
                $line = strtolower($line);
 
                $exists = $this->does_word_exist(mysql_real_escape_string($line));
 
                if ($exists == -1)
 
                {
 
                    $sql = "INSERT INTO " . DICTIONARY_TABLE . " (word, metaphone, len, metaphoner) VALUES (\"".mysql_real_escape_string($line)."\", \"".metaphone($line)."\", \"strlen($line)\", \"".metaphone($line)."\")";
 
                    echo "adding: $line<br>";
 
                    $result = $db->sql_query($sql) or die("could not connect to db [add_from_file]");
 
                }
 
            }
 
            fclose($handle);
 
            echo "added wordlist";
 
        }
 
    }
 
}
 
?> 
 
 |