Logo Search packages:      
Sourcecode: webdruid version File versions  Download package

dot_output.c

/*
    dot_output.c - output functions using the dot utility

    Copyright (C) 2003-2004  Fabien Chevalier (fabien@juliana-multimedia.com)

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/

#include "config.h"

/* local includes */

/* ensure sys/types */
#ifndef _SYS_TYPES_H
#include <sys/types.h>
#endif


/* some systems need this */
#ifdef HAVE_MATH_H
#include <math.h>
#endif

/* SunOS 4.x Fix */
#ifndef CLK_TCK
#define CLK_TCK _SC_CLK_TCK
#endif

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "webdruid.h"
#include "linklist.h"
#include "hashtab.h"
#include "utils.h"
#include "lang.h"

/* graphviz colors we will use -must be visible over a white background */

const char * gv_colors[] = {
               "red",
               "brown",
               "orange",
               "darkgoldenrod",
               "green",
               "cyan",
               "blue",
               "magenta"
            };


#define GV_NCOLORS (sizeof(gv_colors) / sizeof(char*))

/* local prototypes */

void clear_url_index();
void write_dot_url(UNODEPTR urlnode, int index, FILE *fp);
int safe_run(const char *path, char *const argv[], int timeout);

int qs_compare_count(const void* nodeptr1, const void* nodeptr2);
int qs_compare_from(const void* nodeptr1, const void* nodeptr2);


/*********************************************/
/* WRITE_MONTH_PATH_GRAPH - does what it says*/
/*********************************************/

/*
   display the "top paths of the month"
   For now, it means all the path which have
   been followed more than once.

   filename must be of the form xxxx

   The output will produce:
      - xxxx.png : the graph itself
      - xxxx.cmap : client map (to be embedded in
                   HTML);
   */

int write_month_path_graph(const char *file)
{
   char dot_filename[64];
   char buff[256];
   FILE *out_fp;
   char *dot_args[5]; /* to be filled later */

   /* compute filenames */
   snprintf(dot_filename, sizeof(dot_filename), "%s.dot", file);

   out_fp = open_out_file(dot_filename);

   if(out_fp != NULL)
   {
      int ret = 0; /* return value */
      int i;
      int total_items;
      int urlindex;
      PNODEPTR pnode;

      /* lets output header */
      fprintf(out_fp, "digraph G {\n");
      fprintf(out_fp, "  fontname=\"%s\";\n", font_face);
      fprintf(out_fp, "  labelloc=t;\n");
      snprintf(buff, sizeof(buff), _("Top followed paths for %s"), hname);
      fprintf(out_fp, "  label=\"%s\";\n", buff);
      fprintf(out_fp, "  node[shape=ellipse, fontname=\"%s\"];\n", font_face);

      /* count items */
      total_items = 0;
      for(i=0; i<MAXHASH; i++)
      {
         pnode = gp_htab[i];
         while(pnode != NULL)
         {
            total_items++;
            pnode = pnode->next;
         }
      }

      if(total_items > 0)
      {
         PNODEPTR *nodearray = malloc(sizeof(PNODEPTR) * total_items);
         int n_items;
         int j;

         /* fill nodearray */
         j = 0;
         for(i=0; i<MAXHASH; i++)
         {
            pnode = gp_htab[i];
            while(pnode != NULL)
            {
               nodearray[j] = pnode;
               j++;
               pnode = pnode->next;
            }
         }

         /* let's clear urlindex */

         /*
         The index is the number given to an url before it
         is written in the dot file.

         When an index is affected to an url, a line like
         this is written in the dot file.

         urlxx [label="toto.html", ...];

         Then the index is used to refer to the url as
         urlxx

         sample:

            url0 [label="Home page", style="filled", URL="http://cthugha"];
            url1 [label="/pages/fr/", URL="http://cthugha/pages/fr/"];
            url0 -> url1;
         */

         clear_url_index();

         urlindex = 0;

         /* we want to display at most path_graph_max_paths paths */

         if(total_items > path_graph_max_paths)
         {
            qsort(nodearray, total_items, sizeof(PNODEPTR), qs_compare_count);
            n_items = path_graph_max_paths;
         }
         else
         {
            n_items = total_items;
         }

         /* do output dot lines */
         for(i=0; i<n_items; i++)
         {
            pnode = nodearray[i];
            if(pnode->count > 1) /* will prevent generating mad things at the beginning of the month -- we only consider paths used more than once */
            {
               LISTPTR lptr = pnode->path;
               UNODEPTR prevurl = 0, url = 0;
               const char *color_str = gv_colors[random() % GV_NCOLORS];

               fprintf(out_fp, "  edge[color=%s, fontcolor=%s, fontname=\"%s\"];\n", color_str, color_str, font_face);

               while(lptr)
               {
                  prevurl = url;
                  url = find_url_node((char *) lptr->item);
                  if(url->urlindex == -1)
                  {
                     write_dot_url(url, urlindex, out_fp);
                     urlindex++;
                  }
                  if(prevurl != 0)
                  {
                     fprintf(out_fp, "  url%u -> url%u [label=\"x%u\", weight=%u];\n",
                           prevurl->urlindex, url->urlindex, pnode->count, pnode->count);
                  }
                  lptr = lptr->next;
               }
            }
         }
         free(nodearray);
      }
      /* end of the dot file */
      fprintf(out_fp, "}\n");
      fclose(out_fp);

      /*we can now run dot for png*/
      snprintf(buff, sizeof(buff), "-o%s.png", file);

      dot_args[0] = "dot";
      dot_args[1] = "-Tpng";
      dot_args[2] = buff;
      dot_args[3] = dot_filename;
      dot_args[4] = 0;

      ret = safe_run(dot_location, dot_args, DOT_TIMEOUT);

      /*we can now run dot for cmap
       UNUSED FOR NOW

      snprintf(buff, sizeof(buff), "-o%s.cmap", file);

      dot_args[0] = "dot";
      dot_args[1] = "-Tcmap";
      dot_args[2] = buff;
      dot_args[3] = dot_filename;
      dot_args[4] = 0;

      ret = ret || safe_run(dot_location, dot_args, DOT_TIMEOUT);
      */

      unlink(dot_filename);

      return ret;
   }
   else
      return 1;
}

/*********************************************/
/* WRITE_MONTH_USERS_FLOW - does what it says*/
/*********************************************/

int write_month_users_flow(const char *file)
{
   int i;
   int urlindex;
   PNODEPTR pnode;
   char dot_filename[64];
   char buff[256];
   FILE *out_fp;
   char *dot_args[5]; /* to be filled later */

   /*compute filenames */
   snprintf(dot_filename, sizeof(dot_filename), "%s.dot", file);

   out_fp = open_out_file(dot_filename);

   if(out_fp != NULL)
   {
      int ret = 0; /* return value */
      int total_items;

      /* lets output header */
      fprintf(out_fp, "digraph G {\n");
      fprintf(out_fp, "  fontname=\"%s\";\n", font_face);
      fprintf(out_fp, "  labelloc=t;\n");
      snprintf(buff, sizeof(buff), _("Users flow for %s"), hname);
      fprintf(out_fp, "  label=\"%s\";\n", buff);
      fprintf(out_fp, "  node[shape=ellipse, fontname=\"%s\"];\n", font_face);

      /* let's clear urlindex */
      clear_url_index();

      urlindex = 0;

      /* count items */
      total_items = 0;
      for(i=0; i<MAXHASH; i++)
      {
         pnode = fm_htab[i];
         while(pnode != NULL)
         {
            total_items++;
            pnode = pnode->next;
         }
      }

      if(total_items > 0)
      {
         PNODEPTR *nodearray = malloc(sizeof(PNODEPTR) * total_items);
         char * current_from_url;
         int j, k;
         int min_hits;

         /* fill nodearray */
         j = 0;
         for(i=0; i<MAXHASH; i++)
         {
            pnode = fm_htab[i];
            while(pnode != NULL)
            {
               nodearray[j] = pnode;
               j++;
               pnode = pnode->next;
            }
         }

         if(total_items > users_flow_max_edges)
         {
            qsort(nodearray, total_items, sizeof(PNODEPTR), qs_compare_count);
            min_hits = nodearray[users_flow_max_edges]->count;
            /* Check to see if the next item has same count number.
               If this is the case there is no reason why we should
               include it in our graph more than the next one.
               So we increse min_hits by one.
             */
            if(nodearray[users_flow_max_edges]->count == min_hits)
            {
               min_hits++;
            }
         }
         else
         {
            min_hits = 2;
         }

         /* sort nodearray -- key = 1st url of the path */
         qsort(nodearray, total_items, sizeof(PNODEPTR), qs_compare_from);

         j = 0;
         do
         {
            int total_hits = 0;
            const char *color_str;
            k = j; /* keep track from where we began */
            current_from_url = (char *)nodearray[j]->path->item;

            /* count number of hits */
            do
            {
               total_hits += nodearray[j]->count;
               j++;
            } while(j < total_items
                     && current_from_url == (char *)nodearray[j]->path->item);

            /* rewind to beginning */
            j = k;

            /* say what colors we will use */
            color_str = gv_colors[random() % GV_NCOLORS];
            fprintf(out_fp, "  edge[color=%s, fontcolor=%s, fontname=\"%s\"];\n", color_str, color_str, font_face);

            /* this loop will finally produce dot lines */
            do
            {
               if(nodearray[j]->count >= min_hits)
               {
                  UNODEPTR from = find_url_node((char *) nodearray[j]->path->item);
                  UNODEPTR to = find_url_node((char *) nodearray[j]->path->next->item);
                  if(from->urlindex == -1)
                  {
                     write_dot_url(from, urlindex, out_fp);
                     urlindex++;
                  }
                  if(to->urlindex == -1)
                  {
                     write_dot_url(to, urlindex, out_fp);
                     urlindex++;
                  }
                  fprintf(out_fp, "  url%u -> url%u [label=\"%.00f%%\\n(%u hits)\", weight=%u];\n",
                           from->urlindex, to->urlindex,
                           PCENT(nodearray[j]->count, total_hits),
                           nodearray[j]->count, nodearray[j]->count);
               }
               j++;
            } while(j < total_items
                     && current_from_url == (char *)nodearray[j]->path->item);
         } while(j < total_items);

         free(nodearray);
      }

      /* end of the dot file */
      fprintf(out_fp, "}\n");
      fclose(out_fp);

      /*we can now run dot for png*/
      snprintf(buff, sizeof(buff), "-o%s.png", file);

      dot_args[0] = "dot";
      dot_args[1] = "-Tpng";
      dot_args[2] = buff;
      dot_args[3] = dot_filename;
      dot_args[4] = 0;

      ret = safe_run(dot_location, dot_args, DOT_TIMEOUT);

      /*we can now run dot for cmap
       UNUSED FOR NOW
      snprintf(buff, sizeof(buff), "-o%s.cmap", file);

      dot_args[0] = "dot";
      dot_args[1] = "-Tcmap";
      dot_args[2] = buff;
      dot_args[3] = dot_filename;
      dot_args[4] = 0;

      ret = ret || safe_run(dot_location, dot_args, DOT_TIMEOUT);
      */

      unlink(dot_filename);

      return ret;
   }
   else
      return 1;
}

/*********************************************/
/* CLEAR_URLINDEX                            */
/*********************************************/

/*
  resets the u_htab's urlindex to -1

  The index is the number given to an url before it
  is written in the dot file.

  When an index is affected to an url, a line like
  this is written in the dot file.

  urlxx [label="toto.html", ...];

  Then the index is used to refer to the url as
  urlxx

  sample:

    url0 [label="Home page", style="filled", URL="http://cthugha"];
    url1 [label="/pages/fr/", URL="http://cthugha/pages/fr/"];
    url0 -> url1 [label="62%\n(171 hits)"];

*/

void clear_url_index()
{
   int i = 0;
   UNODEPTR urlnode;
   for(i=0; i<MAXHASH; i++)
   {
      urlnode = um_htab[i];
      while(urlnode)
      {
         urlnode->urlindex = -1;
         urlnode = urlnode->next;
      }
   }
}

/*********************************************/
/* WRITE_DOT_URL                             */
/*********************************************/

/* writes a new 'dot' node named url{index}
   in the opened 'dot' file pointed to by fp */

void write_dot_url(UNODEPTR urlnode, int index, FILE *fp)
{
   urlnode->urlindex = index;

   if(strcmp(urlnode->string, "/"))
   {
      /* not home page */
      if(use_https != 0)
         fprintf(fp, "  url%u [label=\"%s\", URL=\"https://%s%s\"];\n",
               urlnode->urlindex, urlnode->string, hname, urlnode->string);
      else
         fprintf(fp, "  url%u [label=\"%s\", URL=\"http://%s%s\"];\n",
               urlnode->urlindex, urlnode->string, hname, urlnode->string);
   }
   else
   {
      /* home page */
      if(use_https != 0)
         fprintf(fp, "  url%u [label=\"%s\", style=\"filled\", URL=\"https://%s\"];\n",
               urlnode->urlindex, _("Home page"), hname);
      else
         fprintf(fp, "  url%u [label=\"%s\", style=\"filled\", URL=\"http://%s\"];\n",
               urlnode->urlindex, _("Home page"), hname);
   }
}

/*********************************************/
/* QS_COMPARE_XXXX quicksort callbacks       */
/*********************************************/


/* ascending order */

int qs_compare_from(const void* nodeptr1, const void* nodeptr2)
{
   return strcmp((const char *)((*(PNODEPTR *)nodeptr1)->path->item),
                 (const char *)((*(PNODEPTR *)nodeptr2)->path->item));
}


/* descending order */

int qs_compare_count(const void* nodeptr1, const void* nodeptr2)
{
  int val1 = (*(PNODEPTR *)nodeptr1)->count;
  int val2 = (*(PNODEPTR *)nodeptr2)->count;

   if(val1 == val2)
      return 0;
   else if (val1 < val2)
      return 1;
   else
      return -1;
}



Generated by  Doxygen 1.6.0   Back to index