here's a short code snippet showing how to parse the from/to/subject from an eml file as well as any urls located in the message. the regex for urls isn't perfect. there are a million ways to do url regex, so pick your poison from the web. this is just for example. reposted from
here.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | using System; using System.IO; using System.Text.RegularExpressions;
namespace parse.eml { class Email { string _path,_to,_from,_subject,_urls;
public Email(string path) { _path = path; string fc = new StreamReader(path).ReadToEnd(); _from = Regex.Matches(fc, "From: (.+)")[0].ToString(); _to = Regex.Matches(fc, "To: (.+)")[0].ToString(); _subject = Regex.Matches(fc, "Subject: (.+)")[0].ToString(); _urls = string.Empty; foreach (Match m in Regex.Matches(fc,@"https?://([a-zA-Z\.]+)/")) { _urls += m.ToString() + ' '; } }
public void show() { Console.WriteLine( "{0}\n\t{1}\n\t{2}\n\t{3}\n\t{4}", _path, _to, _from, _subject, _urls); }
}
class Program { static void Main(string[] args) { foreach (string f in Directory.GetFiles(".", "*.eml")) { Email e = new Email(f); e.show(); } } } } |