前端之家收集整理的这篇文章主要介绍了
正则表达式,模拟网络爬虫小例子,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
- package cn.zhengze;
-
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
-
- public class netbug {
-
- /**
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- File file = new File("mail.html");
- String regex = "\\w+@[a-zA-Z0-9]+(\\.[a-zA-Z]{2,3}){1,3}";
- List<String> mailList = getMails(file,regex);
- for (String mail : mailList) {
- System.out.println(mail);
-
- }
- }
-
- private static List<String> getMails(File file,String regex)
- throws IOException {
-
- BufferedReader bufr = new BufferedReader(new FileReader(file));
-
- Pattern p = Pattern.compile(regex);
- List<String> list = new ArrayList<String>();
- String line = null;
- while ((line = bufr.readLine()) != null) {
- Matcher m = p.matcher(line);
- while (m.find()) {
- list.add(m.group());
- }
-
- }
- return list;
- }
-
- }