/* VIGENERE.C, Text autocorrelation program
   J.W.Stumpel, 18 February 1989, revised 2 October 1990, 10 March 1997
 
   Purpose: this program cryptanalyzes Vigenere-type ciphertexts.
 
   Usage: VIGENERE [ciphertext]
 
   Output can be redirected into a text file.
 
   Non-alphabetic characters in the input-file are ignored.
 
   Exception: the character * can be used to indicate unknown or missing
   letters in the ciphertext.
 
   The * characters are stored, and count as part of the "distance"
   between letters.

   A text file STANDARD.FRQ consisting of 26 integers (separated by
   return/linefeed), proportional to the frequencies of letters in the
   plaintext language,  must be present on the disk.
 
   Max. text length (this version:) 4000 characters  */


#include <stdio.h>
#include <ctype.h>
#include <string.h>

#ifdef __TURBOC__
#include <alloc.h>
#endif

#define KEYMIN 3                /* minimum key length                     */
#define KEYMAX 19               /* maximum key length + 1                 */
#define BUFMAX 4000             /* maximum ciphertext length              */
#define NSYMBOLS 26             /* number of letters in alphabet          */
#define LINE_LENGTH 65          /* length of plaintext lines              */

char findkey (int *, int *);
float phi (int *);
void count (char *, int, int *, int, int);

#ifndef __TURBOC__
/* strupr is defined in turbo c, not in gcc */
char *
strupr (char *s)
{
  int j;
  while (j = *s)
    *s++ = toupper (j);
}
#endif

main (int argc, char *argv[])
{
  int FQ[NSYMBOLS + 1], i, j, letters, key_length, bestkey;
  int standard[NSYMBOLS + 1], k;
  int prompt = 0;
  char *buff, *text, keyword[KEYMAX];
  float max_correlation;
  FILE *STFRQ = fopen ("standard.frq", "r");
  FILE *CIPHER;

  if ((argc >= 3) && (!strcmp (argv[1], "-p")))
    prompt = 1;
  if (argc == 2 + prompt)
    {
      CIPHER = fopen (argv[1 + prompt], "r");
      if (CIPHER == NULL)
        {
          fprintf (stderr, "File \"%s\" not found\n", argv[1 + prompt]);
          return 1;
        }
    }
  else
    {
/*        fprintf (stderr,"One input file must be specified\n");
   return 2; */
      CIPHER = stdin;
    }

/*Read the standard letter frequencies from file */

  if (STFRQ == NULL)
    {
      fprintf (stderr, "File of standard letter frequencies missing\n");
      return 3;
    }

  for (j = 1; j <= NSYMBOLS; j++)
    fscanf (STFRQ, "%d \n", &standard[j]);
  fclose (STFRQ);


/* Get the ciphertext into memory */

  text = buff = (char *) malloc (BUFMAX);
  letters = 0;
  while (1)
    {
      k = getc (CIPHER);
      if ((k == EOF) || (letters >= BUFMAX))
        break;
      if ((isalpha (k)) || (k == '*'))
        {
          *text = (k == '*') ? k : (toupper (k) - 64);
          text++;
          letters++;
        }
    }
/*fclose(CIPHER); */


/* try keylengths from KEYMIN to KEYMAX */

  max_correlation = 0;
  bestkey = 0;
  for (key_length = KEYMIN; key_length < KEYMAX; key_length++)
    {
      float correlation = 0;
      int j;
      for (j = 0; j < key_length; j++)
        {
          count (buff, letters, FQ, j, key_length);
          correlation += phi (FQ);
        }
      correlation /= key_length;
      if (prompt)
        fprintf (stderr,
                 "\nkeylength=%2d   corr=%.4f", key_length, correlation);
      if (correlation > max_correlation)
        {
          bestkey = key_length;
          max_correlation = correlation;
        }
    }

  if (prompt)
    fprintf (stderr,
             "\n\nbest length=%d    corr=%.4f", bestkey, max_correlation);

  k = 0;
  if (prompt)
    {
      fprintf (stderr, "\nProceed with this keylength (y/n) ? ");
      while ((k != 'y') && (k != 'n'))
        k = tolower (getchar ());


      if (k == 'n')
        {
          fprintf (stderr, "\nInput new key length: ");
          scanf ("%d", &bestkey);
        }
    }

  for (j = 0; j < bestkey; j++)
    {
      count (buff, letters, FQ, j, bestkey);
      keyword[j] = findkey (FQ, standard);
    }

  keyword[j] = 0;
  if (prompt)
    fprintf (stderr, "\nMost probable keyword is: %s", keyword);

  k = 0;
  if (prompt)
    {
      fprintf (stderr, "\nProceed with this keyword (y/n) ? ");
      while ((k != 'y') && (k != 'n'))
        k = tolower (getchar ());

      if (k == 'n')
        while (1)
          {
            fprintf (stderr, "\nInput new keyword: ");
            scanf ("%s", keyword);
            strupr (keyword);
            if (strlen (keyword) == bestkey)
              break;
            fprintf (stderr, "\nKeyword must have %d characters",
                     bestkey);
          }
    }
/* print plaintext */

  text = buff;
/* fprintf (stderr, "\n"); */
  i = 0, j = 0;
  while (letters--)
    {
      k = *text++;
      if (k != '*')
        {
          k += 129 - keyword[j];
          if (k < 65)
            k += NSYMBOLS;
        }
      putchar (k);
      if (++j == bestkey)
        j = 0;
      if (++i == LINE_LENGTH)
        {
          i = 0;
          printf ("\n");
        }
    }
  printf ("\n");
  fprintf (stderr, "\n");
  return 0;
}                               /* End of main program */


float
phi (int *f)
/* Perform autocorrelation on a frequency count. f[0] already contains
   the sum of f[1]..f[NSYMBOLS] */
{
  int i;
  long n = 0;
  for (i = 1; i <= NSYMBOLS; i++)
    n += (long) f[i] * f[i];
  n -= f[0];
  return (float) n / (f[0] * f[0]);
}


void 
count (char *buffer, int length, int *frequ, int phase, int period)
/* Count letter frequencies per component (or phase) */
{
  char *buffer_end, *text;
  int j;
  for (j = 0; j <= NSYMBOLS; j++)
    frequ[j] = 0;
  buffer_end = buffer + length;
  text = buffer + phase;
  while (text < buffer_end)
    {
      if (*text != '*')
        {
          frequ[*text]++;
          frequ[0]++;
        }
      text += period;
    }
}

char 
findkey (int *frequ, int *standard)
/* Make correlation of column with plain language frequencies */
{
  long cross_sum, cross_max = 0;
  int offset, best_offset = 0, j, index;
  for (offset = 0; offset < NSYMBOLS; offset++)
    {
      cross_sum = 0;
      for (j = 1; j <= NSYMBOLS; j++)
        {
          index = j + offset;
          if (index > NSYMBOLS)
            index -= NSYMBOLS;
          cross_sum += (long) standard[j] * frequ[index];
        }
      if (cross_sum > cross_max)
        {
          cross_max = cross_sum;
          best_offset = offset;
        }
    }
  return 65 + best_offset;
}
