Replacing URL Character Encodings in Java

Tags: , ,



I am trying to replace character encodings in URL’s to no avail, I have scouted around the internet but have only found outdated libraries and answers.

import java.io.*;
import java.util.*;

public class Chapter6 {

public static void main(String[] args) throws FileNotFoundException{
    Scanner input = new Scanner(new File("BCwebSiteHTML.txt")); 
    System.out.println("Total Count: " + countHref(input));
}

public static int countHref(Scanner s) {
    int count = 0;
    while(s.hasNext()) {
        String token = s.next(); 
        if(token.contains("href="")) {
            count++;
            token = fixHTML(token);
            System.out.println(token.substring(6, token.length() - 1)); 
            
        }
    }
    return count; 
}

public static String fixHTML(String data) {
    data = data.replace("%20", " ");
    data = data.replace("%21", "!");
    data = data.replace("%22", """);
    data = data.replace("%23", "#");
    data = data.replace("%24", "$");
    data = data.replace("%25", "%");
    data = data.replace("%26", "&");
    data = data.replace("%27", "'");
    data = data.replace("%28", "(");
    data = data.replace("%29", ")");
    data = data.replace("%2A", "*");
    data = data.replace("%2B", "+");
    data = data.replace("%2C", ",");
    data = data.replace("%2D", "-");
    data = data.replace("%2E", ".");
    data = data.replace("%2F", "/");
    data = data.replace("%30", "0");
    data = data.replace("%31", "1");
    data = data.replace("%32", "2");
    data = data.replace("%33", "3");
    data = data.replace("%34", "4");
    data = data.replace("%35", "5");
    data = data.replace("%36", "6");
    data = data.replace("%37", "7");
    data = data.replace("%38", "8");
    data = data.replace("%39", "9");
    data = data.replace("%3A", ":");
    data = data.replace("%3B", ";");
    data = data.replace("%3C", "<");
    data = data.replace("%3D", "=");
    data = data.replace("%3E", ">");
    data = data.replace("%3F", "?");
    data = data.replace("%40", "@");
    data = data.replace("%41", "A");
    data = data.replace("%42", "B");
    data = data.replace("%43", "C");
    data = data.replace("%44", "D");
    data = data.replace("%45", "E");
    data = data.replace("%46", "F");
    data = data.replace("%47", "G");
    data = data.replace("%48", "H");
    data = data.replace("%49", "I");
    data = data.replace("%4A", "J");
    data = data.replace("%4B", "K");
    data = data.replace("%4C", "L");
    data = data.replace("%4D", "M");
    data = data.replace("%4E", "N");
    data = data.replace("%4F", "O");
    data = data.replace("%50", "P");
    data = data.replace("%51", "Q");
    data = data.replace("%52", "R");
    data = data.replace("%53", "S");
    data = data.replace("%54", "T");
    data = data.replace("%55", "U");
    data = data.replace("%56", "V");
    data = data.replace("%57", "W");
    data = data.replace("%58", "X");
    data = data.replace("%59", "Y");
    data = data.replace("%5A", "Z");
    data = data.replace("%5B", "[");
    data = data.replace("%5C", "\");
    data = data.replace("%5D", "]");
    data = data.replace("%5E", "^");
    data = data.replace("%5F", "_");
    data = data.replace("%60", "`");
    data = data.replace("%61", "a");
    data = data.replace("%62", "b");
    data = data.replace("%63", "c");
    data = data.replace("%64", "d");
    data = data.replace("%65", "e");
    data = data.replace("%66", "f");
    data = data.replace("%67", "g");
    data = data.replace("%68", "h");
    data = data.replace("%69", "i");
    data = data.replace("%6A", "j");
    data = data.replace("%6B", "k");
    data = data.replace("%6C", "l");
    data = data.replace("%6D", "m");
    data = data.replace("%6E", "n");
    data = data.replace("%6F", "o");
    data = data.replace("%70", "p");
    data = data.replace("%71", "q");
    data = data.replace("%72", "r");
    data = data.replace("%73", "s");
    data = data.replace("%74", "t");
    data = data.replace("%75", "u");
    data = data.replace("%76", "v");
    data = data.replace("%77", "w");
    data = data.replace("%78", "x");
    data = data.replace("%79", "y");
    data = data.replace("%7A", "z");
    data = data.replace("%7B", "{");
    data = data.replace("%7C", "|");
    data = data.replace("%7D", "}");
    data = data.replace("%7E", "~");
    data = data.replace("%80", "`");
    return data;
}

}

For example, https://fonts.googleapis.com/css?family=Noto+Serif%3A400%2C400i%2C700%2C700i&ver=5.3.4 would become https://fonts.googleapis.com/css?family=Noto+Serif:400,400i,700,700i&ver=5.3.4

The code shown above works, but there must be a more concise way of doing this, right? That’s the question

Answer

Actually, you can use java.net.URLDecoder

data = URLDecoder.decode(data);


Source: stackoverflow