我试图使用selenium从指定的
HTML文件中捕获源代码,但我不知道为什么,我没有得到我们从浏览器中看到的确切源代码.
- private static void getHTMLSourceFromURL(String url,String fileName) {
- WebDriver driver = new FirefoxDriver();
- driver.get(url);
- try {
- Thread.sleep(5000); //the page gets loaded completely
- List<String> pageSource = new ArrayList<String>(Arrays.asList(driver.getPageSource().split("\n")));
- writeTextToFile(pageSource,originalFile);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- System.out.println("quitting webdriver");
- driver.quit();
- }
- /**
- * creates file with fileName and writes the content
- *
- * @param content
- * @param fileName
- */
- private static void writeTextToFile(List<String> content,String fileName) {
- PrintWriter pw = null;
- String outputFolder = ".";
- File output = null;
- try {
- File dir = new File(outputFolder + '/' + "HTML Sources");
- if (!dir.exists()) {
- boolean success = dir.mkdirs();
- if (success == false) {
- try {
- throw new Exception(dir + " could not be created");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
- output = new File(dir + "/" + fileName);
- if (!output.exists()) {
- try {
- output.createNewFile();
- } catch (IOException ioe) {
- ioe.printStackTrace();
- }
- }
- pw = new PrintWriter(new FileWriter(output,true));
- for (String line : content) {
- pw.print(line);
- pw.print("\n");
- }
- } catch (IOException ioe) {
- ioe.printStackTrace();
- } finally {
- pw.close();
- }
- }