Skip to content
Advertisement

Trying to download image from tumblr using java selenium

I’m trying to download images form tumblr using java selenium. I extracted the url of the image from src and tried to download images from url. But the images saved not what I expected. Those are in unsupported formats and smaller in size. How can I correct this? Please help.

This is my code:

public static void main(String[] args) throws InterruptedException, AWTException, IOException {

    WebDriver driver = new ChromeDriver();
    driver.manage().window().maximize();
    driver.get("https://artist-childe-hassam.tumblr.com/");
    Thread.sleep(5000); 
    Robot robot = new Robot();
    robot.keyPress(KeyEvent.VK_END);
    robot.keyRelease(KeyEvent.VK_END);

    List<WebElement> list = driver.findElements(By.xpath("//img[@alt]"));
    int count;
    count = 1;
    for (WebElement element : list) {

        String srcs = element.getAttribute("src");
        String attribute = element.getAttribute("alt");
        System.out.println("title: " + attribute);
        System.out.println(" ");
        System.out.println("link " + srcs);
    
        URL url = new URL(srcs);
        InputStream in = new BufferedInputStream(url.openStream());
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        byte[] buf = new byte[1024];
        int n = 0;
        while (-1!=(n=in.read(buf)))
        {
           out.write(buf, 0, n);
        }
        out.close();
        in.close();
        byte[] response = out.toByteArray();
        
        FileOutputStream fos = new FileOutputStream("path"+count+".jpg");
        count++;
        fos.write(response);
        fos.close();
        }

    }
    

}

Advertisement

Answer

I don’t have idea on BufferedInputStream to download image from URL. Instead, I used to use curl to download from URL. I have modified your code and its working fine for me.

public static void main(String[] args) throws InterruptedException, AWTException, IOException {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.manage().window().maximize();
        driver.get("https://artist-childe-hassam.tumblr.com/");
        Thread.sleep(5000);
        Robot robot = new Robot();
        robot.keyPress(KeyEvent.VK_END);
        robot.keyRelease(KeyEvent.VK_END);

        List<WebElement> list = driver.findElements(By.xpath("//img[@alt]"));
        int count;
        count = 1;
        for (WebElement element : list) {
            String srcs = element.getAttribute("src");
            String attribute = element.getAttribute("alt");
            System.out.println("title: " + attribute);
            System.out.println(" ");
            System.out.println("link " + srcs);

            downloadFromUrl(srcs,"Path" + count+".jpg",Duration.ofSeconds(20));
            count++;
        }

    }

    public static boolean downloadFromUrl(String url, String fileNameWithPath, Duration timeoutDuration) {
        try {
            if(timeoutDuration == null) {
                timeoutDuration = Duration.ofMinutes(5);
            }
            String curlStr = "curl " + url + " --output " + fileNameWithPath;
            Process process = Runtime.getRuntime().exec(curlStr);
            long totalSeconds = 0;
            System.out.println("Downloading file to " + fileNameWithPath + " ...");
            while(process.isAlive()) {
                Thread.sleep(1000);
                totalSeconds++;
                if(totalSeconds > timeoutDuration.getSeconds()) {
                    throw new Exception("Unable to download file even after 5 mins of wait");
                }
            }
            System.out.println(fileNameWithPath + " got downloaded in seconds - " + totalSeconds);
            return true;
        } catch (Exception ex) {
            ex.printStackTrace();
        }
        return false;
    }

My downloaded images: enter image description here

Advertisement