Logo Search packages:      
Sourcecode: webdruid version File versions  Download package

sengine.c

/*
    sengine.c - search engines management for The WebDruid

    Copyright (C) 2003-2004  Fabien Chevalier (fabien@juliana-multimedia.com)

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/

/* ensure sys/types */
#ifndef _SYS_TYPES_H
#include <sys/types.h>
#endif

/* some systems need this */
#ifdef HAVE_MATH_H
#include <math.h>
#endif

/* SunOS 4.x Fix */
#ifndef CLK_TCK
#define CLK_TCK _SC_CLK_TCK
#endif

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <langinfo.h> /* nl_langinfo */
#include <iconv.h>    /* iconv_xxxxx */

#include "webdruid.h"
#include "sengine.h"
#include "linklist.h"
#include "hashtab.h" /* for find_url() */
#include "utils.h"
#include "lang.h"

/* internal structs definition */

struct lsengine {
   char   display_name[SE_MAXDNAME + 1];
   char   dns_ext[SE_MAXDNSEXT + 1];
   char   dns_beg[SE_MAXDNSBEG + 1];
   char   dns_middle[SE_MAXDNSMID + 1];
   char  *querytag[SE_MAXQTAG];
   char  *charsettag;
};

/* internal function prototypes */

SNODEPTR  new_snode(char *);         /* new search string..               */

void sengines_dump();                /* debugging purpose                 */

void sengine_del_shtab(SNODEPTR      /* deletes given htab                */
                           *htab);

/* global variables */

int            n_sengines = 0;       /* number of search engines in table */
struct sengine *sp_table  = NULL;    /* search phrases table              */

/* internal variables */
struct lsengine *engines_table = NULL;

/*********************************************/
/* SENGINE_FIND - finds engine given hostname*/
/*********************************************/

/*
  Here we do a dichotomic search based on key
  'middlename', extracted from hostname.

  Then we run throught the list until we find
  (or not) a match.

  returns -1 if not found
*/

/* used_internally by sengine_find */


int sengine_bsearch(int l, int r, char *str)
{
   int m, t;

   for(;;)
   {
      m = (l + r) / 2;

      t = strcmp(str, engines_table[m].dns_middle);

      if(t == 0)
         return m;
      else if(t < 0)
         r = m - 1;
      else
         l = m + 1;

      if(l > r)
         return - 1;
   }
}

/*********************************************/
/* SENGINE_FIND                              */
/*********************************************/

/*
 * find engine index given hostname
 *
 */

int sengine_find(char *referrer)
{
   char *cp1, *cp2, *cp3;
   int ret = -1; /* what will be returned */
   int idx1, idx2;
   struct lsengine * current_sengine;
   char *hostname;

   if(strcmp("http://", referrer) >= 0)
      return ret; /* we suppose we have a wrong referrer here*/

   hostname = referrer + 7; /* strip http:// */

   /* keep only hostname part */
   cp1 = hostname + strlen(hostname) - 1;
   cp3 = hostname;
   while((cp3 <= cp1) && (*cp3 != '/')) cp3++;
   if(cp3 == cp1) cp3 = hostname - 1; /* take the last '/' of http:// */
   *cp3 = 0; /* erase the '/' */

   /* find extension, middle, beginning of the hostname */
   cp2 = hostname + strlen(hostname) - 1;
   while((*cp2 != '.') && (cp2 > hostname)) cp2--;
   if(cp2 == hostname || cp2[1] == 0)
   {
      *cp3 = '/'; /* restore referrer */
      return ret; /* no point in name or point at the end of line */
   }

   /* Here we now have at least one point, so this loop will end... */
   cp1 = hostname;
   while(*cp1 != '.') cp1++;
   if( cp1 == hostname || cp1 == cp2)
   {
      *cp3 = '/'; /* restore referrer */
      return ret; /* nothing before first point or only one point in name */
   }

   /* we now split the string */
   *cp1++ = 0;
   *cp2++ = 0;

   /* now hostname points to the beginning of the dns name,
      cp1 points to the middle, and cp2 to the extension */

   idx1 = sengine_bsearch(0, n_sengines - 1, cp1);

   if(idx1 != -1)
   {
      /* move to first element with the selected middlename */
      idx2 = idx1;
      while((idx1 > 0)
         && (strcmp(engines_table[idx1 - 1].dns_middle, engines_table[idx1].dns_middle) == 0)) idx1--;

      /* move to last element with the selected middlename */
      while((idx2 < (n_sengines - 1))
         && (strcmp(engines_table[idx2 + 1].dns_middle, engines_table[idx2].dns_middle) == 0)) idx2++;

      /* iterate through all elements whith matching middlename */
      for(;(ret == -1) && (idx1 <= idx2); idx1++)
      {
         current_sengine = &engines_table[idx1];
         if(current_sengine->dns_beg[0] == '*' || strcmp(current_sengine->dns_beg, hostname) == 0)
         {
            if(current_sengine->dns_ext[0] == '*' || strcmp(current_sengine->dns_ext, cp2) == 0)
            ret = idx1;
         }
      }
   }

   /* restore hostname string as it was before */
   cp1[-1] = '.';
   cp2[-1] = '.';
   *cp3    = '/';

   return ret;
}

/*********************************************/
/* SENGINE_SEARCH_PHRASE                     */
/*********************************************/

/*
   given engine index (returned by sengine_find),
   returns the string the user typed).
   String must be saved by caller (it is allocated
   in static buffer)
*/

char *sengine_search_phrase(int sengine_index, const char *ptr)
{
   /* ptr should point to unescaped query string */
   static char   tmpbuf[MAXSRCH];
   char          tmpbuf2[MAXSRCH];
   /* take care of growth when converting to UTF-8 -- should be enough for most of the cases */
   static char   tmpbuf3[MAXSRCH*4];
   char          srch[80];
   unsigned char *cp1, *cp2, *cp3, *cps;
   int           sp_flg=0;
   int           ntry = 0;
   iconv_t       iconv_h;  /* iconv_handle for translation into UTF-8 */

   /* for each query tag q=, MT=, ...*/
   for(; engines_table[sengine_index].querytag[ntry] != NULL && ntry < SE_MAXQTAG; ntry++)
   {
      cps = engines_table[sengine_index].querytag[ntry];

      /* Try to find query variable */
      *srch ='?'; srch[sizeof(srch)-1] = '\0';
      strncpy(srch + 1,cps,sizeof(srch)-2);           /* First, try "?...   "   */
      if ((cp1=strstr(ptr,srch))==NULL)
      {
         srch[0]='&';                                 /* Next, try "&..."       */
         if ((cp1=strstr(ptr,srch))==NULL) continue;    /* If not found, split... */
      }
      cp2=tmpbuf;
      while (*cp1!='=' && *cp1!=0) cp1++; if (*cp1!=0) cp1++;
      while (*cp1!='&' && *cp1!=0)
      {
         if (*cp1=='"' || *cp1==',' || *cp1=='?')
            { cp1++; continue; }                         /* skip bad ones..    */
         else
         {
            if (*cp1=='+') *cp1=' ';                      /* change + to space  */
            if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces    */
            if (*cp1==' ') sp_flg=1; else sp_flg=0;       /* (flag spaces here) */
            *cp2++=*cp1++;                         /* normal character   */
         }
      }
      *cp2=0; cp2=tmpbuf;
      if (tmpbuf[0]=='?') tmpbuf[0]=' ';                  /* format fix ?       */

      /* Google specific part - beginning

         OK, OK...it has nothing to do here...but Google is so important nowadays
         that we will do it anyway...
         What are we about to do?
         Deal with google cache feature.
         If a user makes a search, then clicks on the cached links of the page he has found,
         and then follows the link to the original page, Google will send us as query:

         cache:xxxxxx:/location/to/page.html+search+keywords

         what we have to do is only to strip the beginning until we find a space (
         remenber + have already been converted to spaces).

      */

      if(strncmp("cache:", cp2, 6) == 0)
        while( *cp2!=0 && !isspace(*cp2) ) cp2++;

      /* Google specific part - End*/

      while( *cp2!=0 && isspace(*cp2) ) cp2++;            /* skip leading sps.  */
      if (*cp2==0) continue; /* empty string */

      /* any trailing spaces? */
      cp1=cp2+strlen(cp2)-1;
      while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break;

      /* strip invalid chars */
      cp1=cp2;
      while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }

      if(*cp2 == 0) /* ignore empty search strings */
        continue;

      /* cp2 will contain the localized string */

      /* check if the engine gave us its charset */
      if(engines_table[sengine_index].charsettag != NULL)
      {
         /* find charset */
         /* Try to find query variable */
         *srch ='&'; srch[sizeof(srch)-1] = '\0';
         /* First, try "&...   "   */
         strncpy(srch + 1,engines_table[sengine_index].charsettag,sizeof(srch)-2);
         if ((cp1=strstr(ptr,srch))==NULL)
         {
            srch[0]='?';                                 /* Next, try "?..."       */
            cp1=strstr(ptr,srch);
         }

         if(cp1 != NULL)
         {
            /* go to beginning of charset */
            cp1 += strlen(engines_table[sengine_index].charsettag) + 1;
            /* crop string */
            for(cp3=tmpbuf2; *cp1!=0;)
               if(*cp1=='&')
                  break;
               else
                  *cp3++=*cp1++;

            *cp3 = 0; /* close string */
            cp1 = tmpbuf2;
         }
      }

      /* if not NULL, cp1 contains the charset to use */

      if(cp1 == NULL || *cp1 == 0)
        cp1 = nl_langinfo(CODESET); /* if null, we default to current charset */

      if(strcasecmp("UTF-8", cp1) != 0) /* we have work to do */
      {
         /* create handle for convertion to utf-8 */
         iconv_h = iconv_open("UTF-8", cp1);
         /*printf("Converting UTF-8<-%s %s %s \n", cp1, engines_table[sengine_index].display_name, ptr);*/

         if(iconv_h == 0)
         {
            if(verbose)
            {
               fprintf(stderr, _("[sengine_search_phrase] Failed to convert search phrase from %s to UTF-8 (engine %s): Cannot open iconv handle\n"),
                  cp1, engines_table[sengine_index].display_name );
            }
            return cp2;
         }
         else
         {
            char *in = cp2;
            char *out = tmpbuf3;
            size_t in_size = strlen(cp2) + 1, out_size = MAXSRCH * 4;

            if(iconv(iconv_h, &in, &in_size, &out, &out_size) == -1)
            {
               if(verbose)
               {
                 if(errno == EILSEQ)
                 {
                    /*we encountered a multibyte character we cannot handle */
                    fprintf(stderr, _("Cannot convert search phrase from %s character set.\n"), cp1);
                    fprintf(stderr, _("Either the web browser was wrong or this system's default locale doesn't reflect this browser default.\n"));
                    fprintf(stderr, _("You might wish to tweak the current locale.\n"));
                 }
                 else
                 {
                 /* sth weird happened */
                    fprintf(stderr, _("[sengine_search_phrase] Failed to convert search phrase from %s to UTF-8 (engine %s) :%s\n"),
                       cp1, engines_table[sengine_index].display_name, strerror(errno) );
                 }
               }
            }
            else
               cp2 = tmpbuf3;

            /* close handle */
            iconv_close(iconv_h);
         }
      }

      /* to lower */
      strtolower_utf8(cp2);

      return cp2;
   }

   return NULL;
}

/*********************************************/
/* SENGINE_PARSE                             */
/*********************************************/

/*
  loads engine list from file

  returns:
     0 if ok
     1 if error

  note that this function prints its own error
  messages to stderr
*/

/*
  used internally by sengine_parse
*/

char * sanity_check_line(char *buffer)
{
   int i, j;

   /* remove '\n' */
   buffer[strlen(buffer) - 1] = 0;

   /* strip heading whitespaces */
   j = strlen(buffer);
   for(i = 0;isspace(buffer[i]) && i < j; i++);
   buffer = buffer + i;

   /* skip comments and blank lines */
   if ( (buffer[0]=='#') || (strlen(buffer) == 0) ) return NULL;

   /* remove comments */
   for(i=0; i<BUFSIZE && buffer[i]; i++)
      if(buffer[i] == '#')
      {
         buffer[i] = 0;
         break;
      }

   /* strip trailing whitespaces */
   i = strlen(buffer) - 1;
   for(;(i>0) && isspace(buffer[i]); i--) buffer[i] = 0;

   /* to lower until we find a space*/
   j = strlen(buffer) - 1; i = 0;
   for(;i<j && !isspace(buffer[i]); i++) buffer[i] = tolower(buffer[i]);

   return (strlen(buffer) ? buffer : NULL);
}

/*
  used internally by sengine_parse
*/

int qs_compare_sengines(const void* ptr1, const void* ptr2)
{
   return strcmp(((struct lsengine *)ptr1)->dns_middle,
            ((struct lsengine *)ptr2)->dns_middle);
}

/*
  used internally by sengine_parse
*/

int qs_compare_sengines2(const void* ptr1, const void* ptr2)
{
   struct lsengine * se1 = (struct lsengine *)ptr1;
   struct lsengine * se2 = (struct lsengine *)ptr2;
   int ret;

   static signed char cmptable[] = {
      0 , -1, -1,-1,
      +1, 0 , -1,-1,
      +1, +1, 0 ,-1,
      +1, +1, +1, 0
   };
   int y = 0; int x = 0;

   if(*se1->dns_beg == '*')
      y += 2;
   if(*se1->dns_ext == '*')
      y += 1;

   if(*se2->dns_beg == '*')
      x += 2;
   if(*se2->dns_ext == '*')
      x += 1;

   ret = cmptable[x + (y << 2)];
   if(ret == 0)
   {
      /* if we are here it means we may have two identical entries */
      ret = strcmp(se1->dns_beg, se2->dns_beg);
      if(ret == 0)
         return strcmp(se1->dns_ext, se2->dns_ext);
      else
         return ret;
   }
   else
      return ret;
}


int sengine_parse(const char* filename)
{
   FILE *fp;

   char buffer[BUFSIZE];
   char *clean_line; /* line stripped from its space and comments */
   char *cp1, *cp2; /* temporary character pointers */
   int  i;

   if ( (fp=fopen(filename,"r")) == NULL)
   {
      if (verbose)
         fprintf(stderr,"%s %s\n",_("Error: Unable to open search engines file"),filename);
      return 1;
   }

   while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
   {
      char host_middle[SE_MAXDNSMID] = "";
      char host_ext[SE_MAXDNSEXT] = "";
      char host_beg[SE_MAXDNSBEG] = "";
      char display_name[SE_MAXDNAME] = "";
      char *querytag[SE_MAXQTAG];
      char *charsettag = NULL;

      /* check if not blank, not #...., and remove heading and trailing spaces */
      if((clean_line = sanity_check_line(buffer)) == NULL) continue;

      if(strcmp("<searchengine>", buffer) != 0)
      {
         if (verbose)
           fprintf(stderr,"%s %s\n",_("Error: expected <SearchEngine>, got"), buffer);
         return 1;
      }

      /* empty querytag array */
      for(i=0; i<SE_MAXQTAG; i++) querytag[i] = NULL;

      /* let's get all keywords & values */
      while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
      {
         if((clean_line = sanity_check_line(buffer)) == NULL) continue;

         /* check end of block */
         if(strcmp("</searchengine>", buffer) == 0) break;

         /* name keyword ? */
         if(strcmp("name", clean_line) && isspace(clean_line[strlen("name")]))
         {
            clean_line += strlen("name");
            strip_spaces(&clean_line);
            strncpy(display_name, clean_line, SE_MAXDNAME);
            continue;
         }

         /* querytags keyword ? */
         if(strcmp("querytags", clean_line) && isspace(clean_line[strlen("querytags")]))
         {
            char *tag;
            int i;

            clean_line += strlen("querytags") + 1;

            i = 0; tag = strtok(clean_line, ",");

            for(; tag != NULL; tag = strtok(0, ","), i++)
            {
               if(i == SE_MAXQTAG)
               {
                  if (verbose)
                     fprintf(stderr,"%s %u\n",_("Error: too many querytags value, maximum is "), SE_MAXQTAG);
                  return 1;
               }

               strip_spaces(&tag);

               if(*tag == 0)
               {
                  if (verbose)
                     fprintf(stderr,"%s \n",_("Error: empy querytags value"));
                  return 1;
               }
               if(tag[strlen(tag) - 1] != '=')
               {
                  if (verbose)
                     fprintf(stderr,"%s \n",_("Error: invalid querytags value, should end with '='"));
                  return 1;
               }
               /* let's save tag */
               querytag[i] = malloc(strlen(tag) + 1);
               strncpy(querytag[i], tag, strlen(tag) + 1);
            }
            continue;
         }

         /* charsettag keyword ? */
         if(strcmp("charsettag", clean_line) && isspace(clean_line[strlen("charsettag")]))
         {
            clean_line += strlen("charsettag");
            strip_spaces(&clean_line);
            if(clean_line[strlen(clean_line) - 1] != '=')
            {
               if (verbose)
                  fprintf(stderr,"%s \n",_("Error: invalid charsettag value, should end with '='"));
               return 1;

            }
            charsettag = malloc(strlen(clean_line) + 1);
            strncpy(charsettag, clean_line, strlen(clean_line) + 1);
            continue;
         }

         /* hostname keyword ? */
         if(strcmp("hostname", clean_line) && isspace(clean_line[strlen("hostname")]))
         {
            clean_line += strlen("hostname");
            strip_spaces(&clean_line);

            /* find last point */
            cp1 = clean_line;
            cp2 = clean_line + strlen(clean_line) - 1;
            while(*cp2 != '.' && cp2 != cp1) cp2--;

            /* find first point */
            while(*cp1 != '.' && cp2 != cp1) cp1++;

            if(((cp2 - cp1) <= 1) || cp2[1] == 0 || cp1 == clean_line)
            {
               if (verbose)
                  fprintf(stderr,"%s %s\n",_("Error: invalid host name "), clean_line);
               return 1;
            }
            strncpy(host_middle, cp1 + 1, cp2 - cp1 - 1);
            strncpy(host_ext, cp2 + 1, SE_MAXDNSEXT - 1);
            host_ext[SE_MAXDNSEXT - 1] = 0;
            *cp1=  0;
            strncpy(host_beg, clean_line, SE_MAXDNSBEG - 1);
            host_beg[SE_MAXDNSBEG - 1] = 0;
            continue;
         }

         if (verbose)
            fprintf(stderr,"%s %s\n",_("Error: invalid line: "),clean_line);
         return 1;
      }

      /* check if we have all needed pieces of information */

      if(*host_beg == 0 || *host_ext == 0 || *host_middle == 0 ||
         *display_name == 0 || querytag[0] == 0)
      {
         if (verbose)
            fprintf(stderr,"%s %s\n",_("Error: search engine lacks some config lines: "), display_name);
         return 1;
      }

      /* let's add it to the list */
      n_sengines++;
      engines_table = realloc(engines_table, sizeof(struct lsengine) * n_sengines);
      strcpy(engines_table[n_sengines - 1].dns_beg, host_beg);
      strcpy(engines_table[n_sengines - 1].dns_middle, host_middle);
      strcpy(engines_table[n_sengines - 1].dns_ext, host_ext);
      strcpy(engines_table[n_sengines - 1].display_name, display_name);
      for(i=0; i < SE_MAXQTAG; i++)
         engines_table[n_sengines - 1].querytag[i] = querytag[i];
      engines_table[n_sengines - 1].charsettag = charsettag;
   }

   /* let's sort the list , key = dns_middle */
   qsort(engines_table, n_sengines, sizeof(struct lsengine), qs_compare_sengines);

   /* now do push the '*' at the back of the list */
   /* the goal is to have hostnames whith the same middle name ('foo') sorted as:
      xxx.foo.xxx  <-- lowest index
        *.foo.xxx
      xxx.foo.*
        *.foo.*    <-- highest index
   */

   for(i = 0; i < n_sengines;)
   {
      int j = 1; /* j countains the number of entries with same middle name */

      while(((i + j) < n_sengines)
         && (strcmp(engines_table[i].dns_middle, engines_table[i + j].dns_middle) == 0))
      {
         j++;
      }

      if(j > 1)
         qsort(engines_table + i, j, sizeof(struct lsengine), qs_compare_sengines2);

      /* jump to next middle name */
      i += j;
   }

   /* check for duplicate search engines
      --we do not want duplicates, this will break our searches */

   for(i = 1; i < n_sengines; i++)
   {
      if(strcmp(engines_table[i].dns_middle, engines_table[i - 1].dns_middle) == 0
         && strcmp(engines_table[i].dns_beg, engines_table[i - 1].dns_beg) == 0
         && strcmp(engines_table[i].dns_ext, engines_table[i - 1].dns_ext) == 0)
      {
         if (verbose)
            fprintf(stderr, _("Error:Duplicated search engines %s(%s.%s.%s) and %s(%s.%s.%s)\n")
                  ,engines_table[i].display_name, engines_table[i].dns_beg
                  ,engines_table[i].dns_middle
                  ,engines_table[i].dns_ext
                  ,engines_table[i - 1].display_name, engines_table[i - 1].dns_beg
                  ,engines_table[i - 1].dns_middle
                  ,engines_table[i - 1].dns_ext);
         return 1;
      }
   }

   fclose(fp);

   return 0;
}

/*********************************************/
/* SENGINE_MAKE_TABLE                        */
/*********************************************/

/* returns a sorted array of sengines   */

struct sengine *sengine_make_table()
{
   int i;
   struct sengine *ret = malloc(sizeof(struct sengine) * n_sengines);

   if(ret == NULL)
   {
      if(verbose)
         fprintf(stderr,"[sengine_make_table] %s \n",_("Error: out of memory"));
      return NULL;
   }

   for(i=0; i<n_sengines; i++)
   {
      ret[i].display_name =  engines_table[i].display_name;
      ret[i].s_htab = NULL;
      ret[i].count = 0;
   }

   return ret;
}

/*********************************************/
/* SENGINES_FREE_TABLE                       */
/*********************************************/

/* frees given sengine table */

void sengine_free_table(struct sengine *table[])
{
   if(*table != NULL)
   {
      int i;
      /* for each engine */
      for(i = 0; i < n_sengines; i++)
      {
         sengine_del_shtab((*table)[i].s_htab);
         free((*table)[i].s_htab);
      }

      free(*table);
      *table = NULL;
   }
}

/*********************************************/
/* NEW_SNODE - Search str node creation      */
/*********************************************/

SNODEPTR new_snode(char *str)
{
   SNODEPTR newptr;
   char     *sptr;

   if (strlen(str) >= MAXSRCHH)
   {
      if (verbose)
      {
         fprintf(stderr,"[new_snode] %s (%d)",_("Warning: String exceeds storage size"),(int)strlen(str));
         if (debug_mode)
            fprintf(stderr,":\n--> %s",str);
         fprintf(stderr,"\n");
      }
      str[MAXSRCHH-1]=0;
   }

   if ( (sptr=malloc(strlen(str)+1))==NULL ) return (SNODEPTR)NULL;
   strcpy(sptr,str);

   if (( newptr = malloc(sizeof(struct snode))) != NULL)
   {
      newptr->string= sptr;
      newptr->count = 1;
   }
   else free(sptr);
   return newptr;
}

/*********************************************/
/* SENGINE_ADD_SEARCH_PHRASE                 */
/*********************************************/

int sengine_add_search_phrase(char *sp, char *url, u_long count, struct sengine table[], int index)
{
   /* create hash table for given engine if needed */

   if(table[index].s_htab == NULL)
   {
      if((table[index].s_htab = malloc(sizeof(SNODEPTR) * MAXSPHASH)) == NULL)
      {
         if(verbose)
            fprintf(stderr,"[sengine_add_search_phrase] %s\n",_("Error: out of memory"));
         return -1;
      }
      else
      {
         /* let's clear it */
         memset(table[index].s_htab, 0, sizeof(SNODEPTR) * MAXSPHASH);
      }
   }

   /* add node */
   if(sengine_put_snode(sp, url, count, table[index].s_htab) != 0)
      return 1;
   else
   {
      /* if added, update count */
      table[index].count += count;
      return 0;
   }
}

/*********************************************/
/* SENGINE_PUT_SNODE                         */
/*********************************************/

/* insert/update search node */

int sengine_put_snode(char *sp, char *url, u_long count, SNODEPTR *htab)
{
   SNODEPTR cptr,nptr;
   u_long hvalue = hash(sp) & (MAXSPHASH - 1);

   if (sp[0]==0 || sp[0]==' ') return 0;     /* skip bad search strs */

   /* check if hashed */
   if ( (cptr = htab[hvalue]) == NULL)
   {
      /* not hashed */
      if ( (nptr=new_snode(sp)) != NULL)
      {
         nptr->count = count;
         nptr->urls  = new_list(url, strlen(url) + 1);
         nptr->next = NULL;
         htab[hvalue] = nptr;
      }
   }
   else
   {
      /* hashed */
      while (cptr != NULL)
      {
         if (strcmp(cptr->string,sp)==0)
         {
            LISTPTR lptr;
            /* found... bump counter && add url in list if needed */
            cptr->count+=count;
            for(lptr = cptr->urls; lptr != NULL; lptr = lptr->next)
               if(strcmp((char *)lptr->item, url) == 0)
                  break;
            if(lptr == NULL) /* not found, add it */
               add_list(&cptr->urls, url, strlen(url) + 1);
            return 0;
         }
         cptr = cptr->next;
      }
      /* not found... */
      if ( (nptr = new_snode(sp)) != NULL)
      {
         nptr->count = count;
         nptr->urls  = new_list(url, strlen(url) + 1);
         nptr->next  = htab[hvalue];
         htab[hvalue]=nptr;
      }
   }
   return nptr==NULL;
}

/*********************************************/
/* SENGINE_DEL_SHTAB                         */
/*********************************************/

/*
  delete search str hash table

  if argument is NULL, does nothing

*/

void sengine_del_shtab(SNODEPTR *htab)
{
   /* free memory used by hash table */

   if(htab != NULL)
   {
      SNODEPTR aptr,temp;
      int i;

      for (i=0;i<MAXSPHASH;i++)
      {
         if (htab[i] != NULL)
         {
            aptr = htab[i];
            while (aptr != NULL)
            {
               temp = aptr->next;
               free (aptr->string);
               del_list(&aptr->urls, 1);
               free (aptr);
               aptr = temp;
            }
            htab[i]=NULL;
         }
      }
   }
}

/*********************************************/
/* SENGINE_FREE_INTERNAL_TABLE              */
/*********************************************/

void sengine_free_internal_table()
{
   int i, j;
   for(i=0; i < n_sengines; i++)
   {
      for(j=0; (engines_table[i].querytag[j] != NULL) && (j < SE_MAXQTAG); j++)
      {
         free(engines_table[i].querytag[j]);
      }
      free(engines_table[i].charsettag);
   }
   free(engines_table);
}

/*********************************************/
/* SENGINE_DUMP - debugging purpose          */
/*********************************************/

void sengines_dump()
{
   int i, j;

   for(i = 0; i < n_sengines; i++)
   {
      fprintf(stderr, "display_name  -%s-\n", engines_table[i].display_name);
      fprintf(stderr, "dns -%s-%s-%s-\n", engines_table[i].dns_beg,
         engines_table[i].dns_middle, engines_table[i].dns_ext);
      for(j = 0; j < SE_MAXQTAG && engines_table[i].querytag[j]; j++)
      {
         fprintf(stderr, "tag --%s--\n", engines_table[i].querytag[j]);
      }
   }
}


Generated by  Doxygen 1.6.0   Back to index