Demo源码如下:
Demo下载地址:http://download.csdn.net/detail/zxcvbnm32123/5830571
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.IO;
- using System.Text.RegularExpressions;
- namespace _12提取html中的所有的Email地址
- {
- class Program
- {
- static void Main(string[] args)
- {
- string html = File.ReadAllText("1.htm");
- //提取Email
- //通过()提取组,正则表达式如下
- string regEmail = @"([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9]+)(\.[a-zA-Z0-9])+";
- MatchCollection mc = Regex.Matches(html,regEmail);
- //请统计出常用邮件服务提供商的用户使用。
- //163
- //126
- //sohu
- //gmail
- //sina
- //yahoo
- //hotmail
- int count_163 = 0;
- int count_126 = 0;
- int count_gmail = 0;
- int count_qq = 0;
- int count_sohu = 0;
- int count_sina = 0;
- int count_yahoo = 0;
- int count_hotmail = 0;
- foreach (Match match in mc)
- {
- #region MyRegion
- //match.Groups[0].Value中存储的值遇match.Value中存储的值是一样的
- //表示提取到的Email的完整字符串
- //match.Value
- Console.WriteLine(match.Value);
- switch (match.Groups[2].Value)
- {
- //default:
- }
- Console.WriteLine(match.Groups[0].Value);//0:完整邮箱名
- Console.WriteLine(match.Groups[1].Value);//:1:用户名
- Console.WriteLine(match.Groups[2].Value);//:2:域名
- Console.WriteLine(match.Groups[3].Value);//:3:组织名
- #endregion
- Console.WriteLine(match.Value);//输出所有邮箱地址
- //通过match.Groups[]来获取提取组。注意:第0组存储的是完整匹配字符串,要获取组因该从索引1开始。
- switch (match.Groups[2].Value.ToLower())
- {
- case "163":
- count_163++;
- break;
- case "126":
- count_126++;
- break;
- case "gmail":
- count_gmail++;
- break;
- case "qq":
- count_qq++;
- break;
- case "sohu":
- count_sohu++;
- break;
- case "sina":
- count_sina++;
- break;
- case "yahoo":
- count_yahoo++;
- break;
- case "hotmail":
- count_hotmail++;
- break;
- }
- }
- Console.WriteLine("=============统计信息============");
- Console.WriteLine("邮箱总数:{0}",mc.Count);
- Console.WriteLine("网易163邮箱用户数:{0}",count_163);
- Console.WriteLine("网易126邮箱用户数:{0}",count_126);
- Console.WriteLine("gmail邮箱用户数:{0}",count_gmail);
- Console.WriteLine("QQ邮箱用户数:{0}",count_qq);
- Console.WriteLine("sohu邮箱用户数:{0}",count_sohu);
- Console.WriteLine("sina邮箱用户数:{0}",count_sina);
- Console.WriteLine("yahoo邮箱用户数:{0}",count_yahoo);
- Console.WriteLine("hotmail邮箱用户数:{0}",count_hotmail);
- Console.ReadKey();
- }
- }
- }
“1.htm”截图如下:
通过调试控制,查看获取的全部网页字符串,截图如下:
输出结果如下:
Demo下载地址:http://download.csdn.net/detail/zxcvbnm32123/5830571