Commit 120352af authored by unknown's avatar unknown

New versions of mail_to_db.pl and pmail.pl.

Added email threading capabilities to both
programs.


tests/mail_to_db.pl:
  Changes to mail_to_db.pl
  - Removed table name as optional. Future releases may require
    more than one table, after which it's better to have a fixed
    table name.
  - Fixed a bug in report (division by zero error), if table was created,
    but no mails was inserted.
  - Added fields message_id and in_reply_to.
tests/pmail.pl:
  Changed pmail:
  
  New option: --thread. Prints all sub sequent replies in the thread.
  New option: --message_id. Prints message_id and number of replies found.
  Both options are run recursively. That means, not just direct replies to
  the mail found are being searched, but also replies to replies and so on
  until the whole thread has been found.
  
  Clean up: Localized variables and moved code into functions.
parent 20542236
...@@ -17,7 +17,7 @@ use DBI; ...@@ -17,7 +17,7 @@ use DBI;
use Getopt::Long; use Getopt::Long;
$| = 1; $| = 1;
$VER = "2.6"; $VER = "3.0";
$opt_help = 0; $opt_help = 0;
$opt_version = 0; $opt_version = 0;
...@@ -26,7 +26,6 @@ $opt_host = undef(); ...@@ -26,7 +26,6 @@ $opt_host = undef();
$opt_port = undef(); $opt_port = undef();
$opt_socket = undef(); $opt_socket = undef();
$opt_db = "mail"; $opt_db = "mail";
$opt_table = "mails";
$opt_user = undef(); $opt_user = undef();
$opt_password = undef(); $opt_password = undef();
$opt_max_mail_size = 65536; $opt_max_mail_size = 65536;
...@@ -97,7 +96,7 @@ sub main ...@@ -97,7 +96,7 @@ sub main
print "the my.cnf file. This command is available from the latest MySQL\n"; print "the my.cnf file. This command is available from the latest MySQL\n";
print "distribution.\n"; print "distribution.\n";
} }
GetOptions("help","version","host=s","port=i","socket=s","db=s","table=s", GetOptions("help","version","host=s","port=i","socket=s","db=s",
"user=s","password=s","max_mail_size=i","create","test", "user=s","password=s","max_mail_size=i","create","test",
"no_path","debug","stop_on_error","stdin") "no_path","debug","stop_on_error","stdin")
|| die "Wrong option! See $progname --help\n"; || die "Wrong option! See $progname --help\n";
...@@ -123,7 +122,6 @@ sub main ...@@ -123,7 +122,6 @@ sub main
|| die "Couldn't connect: $DBI::errstr\n"; || die "Couldn't connect: $DBI::errstr\n";
die "You must specify the database; use --db=" if (!defined($opt_db)); die "You must specify the database; use --db=" if (!defined($opt_db));
die "You must specify the table; use --table=" if (!defined($opt_table));
create_table($dbh) if ($opt_create); create_table($dbh) if ($opt_create);
...@@ -218,9 +216,9 @@ sub main ...@@ -218,9 +216,9 @@ sub main
print "Total number of mails:\t\t\t\t"; print "Total number of mails:\t\t\t\t";
print $mail_inserted + $ignored; print $mail_inserted + $ignored;
print " (OK: "; print " (OK: ";
print sprintf("%.1f", (($mail_inserted / ($mail_inserted+$ignored)) * 100)); print sprintf("%.1f", ($mail_inserted + $ignored) ? (($mail_inserted / ($mail_inserted+$ignored)) * 100) : 0.0);
print "% Ignored: "; print "% Ignored: ";
print sprintf("%.1f", (($ignored / ($mail_inserted + $ignored)) * 100)); print sprintf("%.1f", ($mail_inserted + $ignored) ? (($ignored / ($mail_inserted + $ignored)) * 100) : 0);
print "%)\n"; print "%)\n";
print "################################ End Report ##################################\n"; print "################################ End Report ##################################\n";
exit(0); exit(0);
...@@ -232,13 +230,15 @@ sub main ...@@ -232,13 +230,15 @@ sub main
sub create_table sub create_table
{ {
my ($dbh) = @_; my ($dbh)= @_;
my ($sth, $query); my ($sth, $query);
$query = <<EOF; $query= <<EOF;
CREATE TABLE $opt_table CREATE TABLE my_mail
( (
mail_id MEDIUMINT UNSIGNED NOT NULL auto_increment, mail_id MEDIUMINT UNSIGNED NOT NULL auto_increment,
message_id VARCHAR(255),
in_reply_to VARCHAR(255),
date DATETIME NOT NULL, date DATETIME NOT NULL,
time_zone VARCHAR(20), time_zone VARCHAR(20),
mail_from VARCHAR(120) NOT NULL, mail_from VARCHAR(120) NOT NULL,
...@@ -250,6 +250,8 @@ CREATE TABLE $opt_table ...@@ -250,6 +250,8 @@ CREATE TABLE $opt_table
file VARCHAR(64) NOT NULL, file VARCHAR(64) NOT NULL,
hash INTEGER NOT NULL, hash INTEGER NOT NULL,
KEY (mail_id), KEY (mail_id),
KEY (message_id),
KEY (in_reply_to),
PRIMARY KEY (mail_from, date, hash)) PRIMARY KEY (mail_from, date, hash))
TYPE=MyISAM COMMENT='' TYPE=MyISAM COMMENT=''
EOF EOF
...@@ -277,7 +279,7 @@ sub process_mail_file ...@@ -277,7 +279,7 @@ sub process_mail_file
chop if (substr($_, -1, 1) eq "\r"); chop if (substr($_, -1, 1) eq "\r");
if ($type ne "message") if ($type ne "message")
{ {
if (/^Reply-To: (.*)/i) if (/^Reply-To:\s*(.*)/i)
{ {
$type = "reply"; $type = "reply";
$values{$type} = $1; $values{$type} = $1;
...@@ -302,14 +304,27 @@ sub process_mail_file ...@@ -302,14 +304,27 @@ sub process_mail_file
$type = "subject"; $type = "subject";
$values{$type} = $1; $values{$type} = $1;
} }
elsif (/^Message-Id:\s*(.*)/i)
{
$type = "message_id";
s/^\s*(<.*>)\s*/$1/;
$values{$type} = $1;
}
elsif (/^In-Reply-To:\s*(.*)/i)
{
$type = "in_reply_to";
s/^\s*(<.*>)\s*/$1/;
$values{$type} = $1;
}
elsif (/^Date: (.*)/i) elsif (/^Date: (.*)/i)
{ {
date_parser($1, \%values, $file_name); date_parser($1, \%values, $file_name);
$type = "rubbish"; $type = "rubbish";
} }
elsif (/^[\w\W-]+:\s/) # Catch those fields that we don't or can't handle (yet)
elsif (/^[\w\W-]+:/)
{ {
$type = "rubbish"; $type = "rubbish";
} }
elsif ($_ eq "") elsif ($_ eq "")
{ {
...@@ -319,6 +334,10 @@ sub process_mail_file ...@@ -319,6 +334,10 @@ sub process_mail_file
else else
{ {
s/^\s*/ /; s/^\s*/ /;
if ($type eq 'message_id' || $type eq 'in_reply_to')
{
s/^\s*(<.*>)\s*/$1/;
}
$values{$type} .= $_; $values{$type} .= $_;
} }
} }
...@@ -421,8 +440,10 @@ sub update_table ...@@ -421,8 +440,10 @@ sub update_table
goto restart; # Some mails may have duplicated messages goto restart; # Some mails may have duplicated messages
} }
$q = "INSERT INTO $opt_table ("; $q = "INSERT INTO my_mail (";
$q.= "mail_id,"; $q.= "mail_id,";
$q.= "message_id,";
$q.= "in_reply_to,";
$q.= "date,"; $q.= "date,";
$q.= "time_zone,"; $q.= "time_zone,";
$q.= "mail_from,"; $q.= "mail_from,";
...@@ -435,6 +456,12 @@ sub update_table ...@@ -435,6 +456,12 @@ sub update_table
$q.= "hash"; $q.= "hash";
$q.= ") VALUES ("; $q.= ") VALUES (";
$q.= "NULL,"; $q.= "NULL,";
$q.= (defined($values->{'message_id'}) ?
$dbh->quote($values->{'message_id'}) : "NULL");
$q.= ",";
$q.= (defined($values->{'in_reply_to'}) ?
$dbh->quote($values->{'in_reply_to'}) : "NULL");
$q.= ",";
$q.= "'" . $values->{'date'} . "',"; $q.= "'" . $values->{'date'} . "',";
$q.= (defined($values->{'time_zone'}) ? $q.= (defined($values->{'time_zone'}) ?
$dbh->quote($values->{'time_zone'}) : "NULL"); $dbh->quote($values->{'time_zone'}) : "NULL");
...@@ -575,7 +602,6 @@ Options: ...@@ -575,7 +602,6 @@ Options:
--port=# TCP/IP port to be used with connection. --port=# TCP/IP port to be used with connection.
--socket=... MySQL UNIX socket to be used with connection. --socket=... MySQL UNIX socket to be used with connection.
--db=... Database to be used. --db=... Database to be used.
--table=... Table name for mails.
--user=... Username for connecting. --user=... Username for connecting.
--password=... Password for the user. --password=... Password for the user.
--stdin Read mails from stdin. --stdin Read mails from stdin.
......
#!/usr/bin/perl #!/usr/bin/perl -w
# #
# Prints mails to standard output # Prints mails to standard output
# #
...@@ -9,21 +9,25 @@ ...@@ -9,21 +9,25 @@
use DBI; use DBI;
use Getopt::Long; use Getopt::Long;
$VER="1.5"; $VER="2.0";
@fldnms= ("mail_from","mail_to","cc","date","time_zone","file","sbj","txt"); @fldnms= ("mail_from","mail_to","cc","date","time_zone","file","sbj","txt");
$fields=8; my $fields= 0;
@mail= (@from,@to,@cc,@date,@time_zone,@file,@sbj,@txt); my $base_q= "";
my $mail_count= 0;
$opt_user= $opt_password= ""; $opt_user= $opt_password= "";
$opt_socket= "/tmp/mysql.sock"; $opt_socket= "/tmp/mysql.sock";
$opt_port= 3306; $opt_port= 3306;
$opt_db="mail"; $opt_db="mail";
$opt_table="mails"; $opt_table="my_mail";
$opt_help=$opt_count=0; $opt_help=$opt_count=0;
$opt_thread= 0;
$opt_host= "";
$opt_message_id= 0;
GetOptions("help","count","port=i","db=s","table=s","host=s","password=s", GetOptions("help","count","port=i","db=s","table=s","host=s","password=s",
"user=s","socket=s") || usage(); "user=s","socket=s", "thread","message_id") || usage();
if ($opt_host eq '') if ($opt_host eq '')
{ {
...@@ -39,81 +43,194 @@ if ($opt_help || !$ARGV[0]) ...@@ -39,81 +43,194 @@ if ($opt_help || !$ARGV[0])
#### Connect and parsing the query to MySQL #### Connect and parsing the query to MySQL
#### ####
$dbh= DBI->connect("DBI:mysql:$opt_db:$opt_host:port=$opt_port:mysql_socket=$opt_mysql_socket", $opt_user,$opt_password, { PrintError => 0}) $dbh= DBI->connect("DBI:mysql:$opt_db:$opt_host:port=$opt_port:mysql_socket=$opt_socket", $opt_user,$opt_password, { PrintError => 0})
|| die $DBI::errstr; || die $DBI::errstr;
if ($opt_count) main();
{
count_mails();
}
$fields=0; ####
$query = "select "; #### main
foreach $val (@fldnms) ####
sub main
{ {
if (!$fields) my ($row, $val, $q, $mail, $sth);
if ($opt_count)
{ {
$query.= "$val"; count_mails();
} }
else
$base_q= "SELECT ";
foreach $val (@fldnms)
{
if (!$fields)
{
$base_q.= "$val";
}
else
{
$base_q.= ",$val";
}
$fields++;
}
$base_q.= ",message_id" if ($opt_thread || $opt_message_id);
$base_q.= " FROM $opt_table";
$q= " WHERE $ARGV[0]";
$sth= $dbh->prepare($base_q . $q);
if (!$sth->execute)
{
print "$DBI::errstr\n";
$sth->finish;
die;
}
for (; ($row= $sth->fetchrow_arrayref); $mail_count++)
{ {
$query.= ",$val"; for ($i= 0; $i < $fields; $i++)
{
if ($opt_message_id)
{
$mail[$fields][$mail_count]= $row->[$fields];
$mail[$fields][$mail_count].= "\nNumber of Replies: " . get_nr_replies($row->[$fields]);
}
$mail[$i][$mail_count]= $row->[$i];
}
if ($opt_thread)
{
get_mail_by_message_id($row->[$fields], $mail);
}
} }
$fields++; print_mails($mail);
} }
$query.= " from $opt_table where $ARGV[0] order by date desc";
#### ####
#### Send query and save result #### Function, which fetches mail by searching in-reply-to with
#### a given message_id. Saves the value (mail) in mail variable.
#### Returns the message id of the mail found and searches again
#### and saves, until no more mails are found with that message_id.
#### ####
$sth= $dbh->prepare($query); sub get_mail_by_message_id
if (!$sth->execute)
{ {
print "$DBI::errstr\n"; my ($message_id, $mail)= @_;
$sth->finish; my ($q, $query, $i, $row, $sth);
die;
} $q= " WHERE in_reply_to = \"$message_id\"";
for ($i=0; ($row= $sth->fetchrow_arrayref); $i++) $query= $base_q . $q;
{ $sth= $dbh->prepare($query);
for ($j=0; $j < $fields; $j++) if (!$sth->execute)
{
print "QUERY: $query\n$DBI::errstr\n";
$sth->finish;
die;
}
while (($row= $sth->fetchrow_arrayref))
{ {
$mail[$j][$i]= $row->[$j]; $mail_count++;
for ($i= 0; $i < $fields; $i++)
{
if ($opt_message_id)
{
$mail[$fields][$mail_count]= $row->[$fields];
$mail[$fields][$mail_count].= "\nNumber of Replies: " . get_nr_replies($row->[$fields]);
}
$mail[$i][$mail_count]= $row->[$i];
}
$new_message_id= $row->[$fields];
if (defined($new_message_id) && length($new_message_id))
{
get_mail_by_message_id($new_message_id, $mail);
}
} }
return;
} }
#### ####
#### Print to stderr #### Get number of replies for a given message_id
#### ####
for ($i=0; $mail[0][$i]; $i++) sub get_nr_replies
{ {
print "#" x 33; my ($message_id)= @_;
print " " . ($i+1) . ". Mail "; my ($sth, $sth2, $q, $row, $row2, $nr_replies);
print "#" x 33;
print "\nFrom: $mail[0][$i]\n"; $nr_replies= 0;
print "To: $mail[1][$i]\n"; $q= "SELECT COUNT(*) FROM my_mail WHERE in_reply_to=\"$message_id\"";
print "Cc: $mail[2][$i]\n"; $sth= $dbh->prepare($q);
print "Date: $mail[3][$i]\n"; if (!$sth->execute)
print "Timezone: $mail[4][$i]\n"; {
print "File: $mail[5][$i]\n"; print "QUERY: $q\n$DBI::errstr\n";
print "Subject: $mail[6][$i]\n"; $sth->finish;
print "Message:\n$mail[7][$i]\n"; die;
} }
print "#" x 20; while (($row= $sth->fetchrow_arrayref))
print " Summary: "; {
if ($i == 1) if (($nr_replies= $row->[0]))
{ {
print "$i Mail "; $q= "SELECT message_id FROM my_mail WHERE in_reply_to=\"$message_id\"";
print "matches the query "; $sth2= $dbh->prepare($q);
if (!$sth2->execute)
{
print "QUERY: $q\n$DBI::errstr\n";
$sth->finish;
die;
}
while (($row2= $sth2->fetchrow_arrayref))
{
# There may be several replies to the same mail. Also the
# replies to the 'parent' mail may contain several replies
# and so on. Thus we need to calculate it recursively.
$nr_replies+= get_nr_replies($row2->[0]);
}
}
return $nr_replies;
}
} }
else
####
#### Print mails
####
sub print_mails
{ {
print "$i Mails "; my ($mail)= @_;
print "match the query "; my ($i);
}
print "#" x 20; for ($i=0; $mail[0][$i]; $i++)
print "\n"; {
print "#" x 33;
print " " . ($i+1) . ". Mail ";
print "#" x 33;
print "\n";
if ($opt_message_id)
{
print "Msg ID: $mail[$fields][$i]\n";
}
print "From: $mail[0][$i]\n";
print "To: $mail[1][$i]\n";
print "Cc:" . (defined($mail[2][$i]) ? $mail[2][$i] : "") . "\n";
print "Date: $mail[3][$i]\n";
print "Timezone: $mail[4][$i]\n";
print "File: $mail[5][$i]\n";
print "Subject: $mail[6][$i]\n";
print "Message:\n$mail[7][$i]\n";
}
print "#" x 20;
print " Summary: ";
if ($i == 1)
{
print "$i Mail ";
print "matches the query ";
}
else
{
print "$i Mails ";
print "match the query ";
}
print "#" x 20;
print "\n";
}
#### ####
#### Count mails that matches the query, but don't show them #### Count mails that matches the query, but don't show them
...@@ -121,6 +238,8 @@ print "\n"; ...@@ -121,6 +238,8 @@ print "\n";
sub count_mails sub count_mails
{ {
my ($sth);
$sth= $dbh->prepare("select count(*) from $opt_table where $ARGV[0]"); $sth= $dbh->prepare("select count(*) from $opt_table where $ARGV[0]");
if (!$sth->execute) if (!$sth->execute)
{ {
...@@ -154,15 +273,21 @@ sub usage ...@@ -154,15 +273,21 @@ sub usage
Usage: pmail [options] "SQL where clause" Usage: pmail [options] "SQL where clause"
Options: Options:
--help show this help --help show this help
--count Shows how many mails matches the query, but not the mails. --count Shows how many mails matches the query, but not the mails.
--db= database to use (Default: $opt_db) --db= database to use (Default: $opt_db)
--table= table to use (Default: $opt_table) --host= Hostname which to connect (Default: $opt_host)
--host= Hostname which to connect (Default: $opt_host) --socket= Unix socket to be used for connection (Default: $opt_socket)
--socket= Unix socket to be used for connection (Default: $opt_socket) --password= Password to use for mysql
--password= Password to use for mysql --user= User to be used for mysql connection, if not current user
--user= User to be used for mysql connection, if not current user --port= mysql port to be used (Default: $opt_port)
--port= mysql port to be used (Default: $opt_port) --thread Will search for possible replies to emails found by the search
criteria. Replies, if found, will be displayed right after the
original mail.
--message_id Display message_id on top of each mail. Useful when searching
email threads with --thread. On the second line is the number
of replies to the same thread, starting counting from that
mail (excluding possible parent mails).
"SQL where clause" is the end of the select clause, "SQL where clause" is the end of the select clause,
where the condition is expressed. The result will where the condition is expressed. The result will
be the mail(s) that matches the condition and be the mail(s) that matches the condition and
...@@ -176,18 +301,20 @@ sub usage ...@@ -176,18 +301,20 @@ sub usage
- Subject - Subject
- Message text - Message text
The field names that can be used in the where clause are: The field names that can be used in the where clause are:
Field Type Field Type
- mail_from varchar(120) - message_id varchar(255) # Use with --thread and --message_id
- date datetime - in_reply_to varchar(255) # Internally used by --thread
- sbj varchar(200) - mail_from varchar(120)
- txt mediumtext - date datetime
- cc text - sbj varchar(200)
- mail_to text - txt mediumtext
- time_zone varchar(6) - cc text
- reply varchar(120) - mail_to text
- file varchar(32) - time_zone varchar(6)
- hash int(11) - reply varchar(120)
An example of the pmail: - file varchar(32)
- hash int(11)
An example of pmail:
pmail "txt like '%libmysql.dll%' and sbj like '%delphi%'" pmail "txt like '%libmysql.dll%' and sbj like '%delphi%'"
NOTE: the txt field is NOT case sensitive! NOTE: the txt field is NOT case sensitive!
EOF EOF
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment