#!/usr/bin/perl
# ########################################################################
# keyextract.pl is a feature extractor for keystroke monitoring
# Copyright (C) 2008 Deian Stefan (stefan at cooper dot edu)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
# ########################################################################
use strict;
use warnings;
use xWord;
use xKey;
use Getopt::Long;

# ########################################################################
# TODO: track_key keeps track of the words and the features we are 
# interested in, what it does not do is fix things when a user clicks
# backspace and adjust timing of next key
# this might have to be added as a little hack

# TODO: possibly keep track of all the windows (in case the user)
# moves the mouse and types at the same time, and not just the window 
# of the 1st pressed key

# ########################################################################
# setting up.....

sub help {
   print "$0 [--option=<value>] -".
         "Feature Extractor taking input from XTrapDei\n".
         ("-"x80)."\n".
         "option\t\tvalue\t\t\tDescription\n".
         ("-"x80)."\n".
         "tee|t\t\tfilename\t\tPrint to stdout and file filename\n".
         "file|f\t\tfilename\t\tPrint only to file filename\n".
         "debug\t\tlevel\t\t\tSet the debug level (0 for no debugging)\n".
         "buff|b\t\tsize\t\t\tSet the max buffer size\n".
         "db|d\t\tfilename\t\tPath to the database of words\n".
         "kl\t\tfilename\t\tKeylogger filenam or stdin\n".
         "iobuf|i\t\t\t\t\tBuffered I/O (default off)\n".
         "help|h|?\t\t\t\tThis message\n".
         ("-"x80)."\n";
}

our ($KEYLOGGER,$DATABASE,$BUF_SIZ,$DEBUG);
require "constants.pl";        #names (e.g. KEYLOGGER, DATABASE, etc.)
require "xtrapdei_special.pl"; #key mappings, nice prints
require "db_tools.pl";         #database tools

my $keylog=$KEYLOGGER; #name of the keylogger
my $db_fname=$DATABASE; #database name


my $tee_fname='';
my $out_fname='';
my $io_buf=0;
my $KL=\*STDIN;

GetOptions("tee|t=s" => \$tee_fname,
           "file|f=s" => \$out_fname,
           "debug=i" => \$DEBUG,
           "buff|b=i" => \$BUF_SIZ,
           "db|d=s" => \$db_fname,
           "kl=s" => \$keylog,
           "iobuf|i=i" => \$io_buf,
           "help|?|h" => sub { help(); exit(0); });

if($tee_fname) {
   open(OUT_F,">$tee_fname") or die "Could not open file \'$tee_fname\' ".
      "for writing";
   select((select(OUT_F), $| = 1)[0]) if $io_buf==0; 
} elsif($out_fname) {
   open(OUT_F,">$out_fname") or die "Could not open file \'$out_fname\' ".
      "for writing";
   select((select(OUT_F), $| = 1)[0]) if $io_buf==0; 
}

die "Invalid buffer size" if $BUF_SIZ<=0;



our %specialsyms;
our %specialsyms_print;

my @winbuf=();
my $winbuf_siz=undef; #size of circular buffer
my $winbuf_last_str="";
my ($winbuf_win_siz_min,$winbuf_win_siz_max)=($BUF_SIZ,0); #size of windows


# ########################################################################
# helper functions

sub print_all {
   if($DEBUG>=3) {
      print "\n ".("-"x78)."\n";
      printf("%20s\tPRESS\t\tRELEASE\n","key");
      print "\n ".("-"x78)."\n";
      for(my $i=0;$i<$winbuf_siz;$i++) {
         if(defined $winbuf[$i]) {
            printf"[%2d]",$i;
            $winbuf[$i]->print;
         }
      }
      print "\n ".("-"x78)."\n";
   }
}

sub get_str {
   my $s="";

   for(my $i=0;$i<=$#winbuf;$i++) {
      if(defined $winbuf[$i]) {
         if(exists $specialsyms_print{$winbuf[$i]->key}) {
            $s.=$specialsyms_print{$winbuf[$i]->key};
         } else {
            $s.=$winbuf[$i]->key;
         }
         if(!(defined $winbuf[$i]->r)) { last; }
      } else { last; }
   }

   return $s;
}

sub creat_word {
   my $str=shift;
   my $word=xWord->new;
   my $s="";

   for(my $i=0;$i<=$#winbuf and $i<length($str);$i++) {

      if(defined $winbuf[$i] and defined $winbuf[$i]->r) {
         my $ch;
         if(exists $specialsyms_print{$winbuf[$i]->key}) {
            $ch=$specialsyms_print{$winbuf[$i]->key};
         } else {
            $ch=$winbuf[$i]->key;
         }
         $s.=$ch;
         $word->add_key($winbuf[$i],$ch);
      } else { last; }
   }
   if($str ne $s) { 
      print "creat_word: str \'$str\' and s \'$s\' do not match!\n" if $DEBUG>=1;
      return undef;
   }
   return $word;
}


sub get_inv_case {
   my $ch=shift;

   if($ch =~ m/^[a-z]$/) { # must be only 1 character
      return uc($ch);
   } elsif($ch =~ m/^[A-Z]$/) { #must be only one character
      return lc($ch);
   } elsif(exists $specialsyms{$ch}) {
      return $specialsyms{$ch};
   } else {
      return $ch;
   }
}
# ########################################################################
sub try_extract {
   my $str;
   my $ret=undef;
   $str=get_str();

#lookup the string in the database and extract the features
#   if($str ne $winbuf_last_str) {
#don't want to keep check against same word
#      $winbuf_last_str=$str; 
      if(length($str) and $#winbuf==$winbuf_siz) {
         for(my $win=$winbuf_win_siz_max;$win>=$winbuf_win_siz_min and
               $win<=length($str);$win--) {
            my $str_sub=substr($str,0,$win);
            printf("substr(%s,0,%2d) = %s\n",$str,$win,$str_sub) 
               if $DEBUG>=3;
            if(db_lookup($str_sub)) {
               my $word=creat_word($str_sub);

               print "\n\'$str_sub\' is in the database!\n".
                  "Getting vector timing information... " if $DEBUG>=2;

               if(defined $word) {
                  print "OK!\n" if $DEBUG>=2;
#                  print "\@".($word->get_str_vec)."\n";
                  $ret="\@".($word->get_features)."\n";

# clear word from buffer
                  for(my $i=0;$i<length($str_sub)-1;$i++) {
                     shift(@winbuf);
                  }
               } else {
                  print "FAILED!\n" if $DEBUG>=2;
               }

               last;
            }
         }
      }
#   }

   return $ret;
}
# ########################################################################

# ########################################################################

sub track_key {
   my ($win,$ch,$t,$event) = @_;
   my $feat;

   if($event=~/KeyPress/) {
      my $k=xKey->new($win);
      $k->ch($ch);
      $k->key_act($event,$t);
      push(@winbuf, $k);
# this is the circular buffer, keeping all the pressed keys will
# result in dramatic memory usage and would not be very useful either

   $feat=try_extract();
   if(defined $feat) {
      print OUT_F $feat if $tee_fname||$out_fname;
      print $feat if (!$out_fname);
   }
   if($#winbuf==$winbuf_siz) { shift(@winbuf); } #pop 1st element
#if haven't read anything

   } elsif($event=~/KeyRelease/) {
# want to find the last addded letter to modify the release time
      my $lasti=undef;

      for(my $i=$#winbuf;$i>=0;$i--) {
         if(defined $winbuf[$i]) {
            if(!(defined $winbuf[$i]->r)) {
               if(($winbuf[$i]->key eq $ch) or
                     ($winbuf[$i]->key eq get_inv_case($ch))) {
# The second part is true when you press say Shift+O and first release 
# the Shift key and then 'O' (so the release would look like 'o')
                  $lasti=$i;
                  last;
               }
            }
         }
      }

      if(defined $lasti) {
#add the release info
         $winbuf[$lasti]->key_act($event,$t);
      }

   $feat=try_extract();
   if(defined $feat) {
      print OUT_F $feat if $tee_fname||$out_fname;
      print $feat if (!$out_fname);
   }
   }

# try to extract a feature


}

# ########################################################################

# must autoflush if want to pass imediately to classifier
select((select(STDOUT), $| = 1)[0]) if $io_buf==0; 
#select((select($KL), $| = 1)[0]) if $io_buf==0; 

($winbuf_win_siz_min,
 $winbuf_win_siz_max,$winbuf_siz)=db_import($db_fname,$BUF_SIZ,0);


db_print() if $DEBUG>=2;

if(!($keylog=~m/stdin/i))  {
   open($KL,"$keylog|") or die "Coud not open pipe to $keylog\n";
}

my $ln=0; #line number
while(my $line=<$KL>) {
   $ln++;
   chomp($line);

   my @keylog_line=split(/\|/, $line);
   if($keylog_line[1] =~ m/Key/) {
      my ($win,$event,$type,$kc,$ks,$ch,
            $screen,$rootXY,$root,$state,$t) = @keylog_line;

      if($win =~ /Window=(.*)/) { $win=$1; }
      if($ch =~ /char=(.*)/) { $ch=$1; }
      if($t =~ /time=([^]]*)/) { $t=$1; }
      if($event =~ /Event=(.*)/) { $event=$1; }

      track_key($win,$ch,$t,$event);
      print_all();
   }

}

close($KL);
close(OUT_F) if ($tee_fname||$out_fname);

