I’m trying to search through a file (File B) for matching strings from another file (File A). If the string is found in File A, then print the entire line(s) from File B and also update its progress to its corresponding JProgressBar(s) as the lines are being read.
The code below is working fine as expected, but the issue is performance. When dealing with large files, it takes about 15 minutes to scan just 5 thousand lines.
I’m really looking for a way to process large files for example 500K lines.
Please suggest if this can be enhanced to handle large files or which part of my code is causing the slowness.
import java.awt.BorderLayout; import java.awt.EventQueue; import java.awt.TextField; import javax.swing.JFrame; import javax.swing.JPanel; import javax.swing.border.EmptyBorder; import javax.swing.JFileChooser; import javax.swing.JProgressBar; import javax.swing.JTextArea; import javax.swing.JButton; import java.awt.Font; import javax.swing.JTextField; import javax.swing.JLabel; import javax.swing.JScrollPane; import java.awt.event.ActionListener; import java.awt.event.ActionEvent; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; import java.time.LocalDateTime; public class Test_MultiJProgressBars_MultiFileReads extends JFrame { private JPanel contentPane; private JTextField textField_File1; private JTextField textField_File2; private JProgressBar progressBar_F1; private JProgressBar progressBar_F2; private JTextArea textArea_File1; /** * Launch the application. */ public static void main(String[] args) { EventQueue.invokeLater(new Runnable() { public void run() { try { Test_MultiJProgressBars_MultiFileReads frame = new Test_MultiJProgressBars_MultiFileReads(); frame.setVisible(true); } catch (Exception e) { e.printStackTrace(); } } }); } /** * Create the frame. */ public void FileLineCount (JTextField TexFieldName, JProgressBar ProgressBarName) throws IOException { File FileX = new File (TexFieldName.getText()); FileReader Fr = new FileReader(FileX); LineNumberReader Lnr = new LineNumberReader(Fr); int lineNumber =0 ; while (Lnr.readLine() !=null) { lineNumber++; } // Setting Maximum Value on ProgressBar ProgressBarName.setMaximum(lineNumber); System.out.println("Total line in file : "+lineNumber); Lnr.close(); } public void ScanFileForMatches() { File My_Refernce_File = new File (textField_File1.getText()); File My_Source_File = new File (textField_File2.getText()); int F1_JP_v = 0; int F2_JP_v = 0; try { BufferedReader F1_br = new BufferedReader(new FileReader(My_Refernce_File)); String F1_br_Line; String F2_br_Line = null; while ((F1_br_Line = F1_br.readLine()) !=null) { //System.out.println("File 1 : "+F1_br_Line+"n"); F1_JP_v++; progressBar_F1.setValue(F1_JP_v); try { BufferedReader F2_br = new BufferedReader(new FileReader(My_Source_File)); while ((F2_br_Line = F2_br.readLine()) !=null) { F2_JP_v++; progressBar_F2.setValue(F2_JP_v); if (F1_br_Line.contains(F2_br_Line)) { System.out.println("MATCHED --- File 1:"+F1_br_Line+" File 2:"+F2_br_Line+"n"); textArea_File1.append(LocalDateTime.now()+" : SYSOUT : MATCHED --- File 1:= "+F1_br_Line"n"); } else { System.out.println("NOMATCH --- File 1:"+F1_br_Line+" File 2:"+F2_br_Line+"n"); } // Reset Progressbar after each Loop. progressBar_F2.setValue(0); } // Set Progressbar to last value in the loop. progressBar_F2.setValue(F2_JP_v); F2_br.close(); } catch (Exception e) { // TODO: handle exception } } F1_br.close(); } catch (Exception e) { // TODO: handle exception } } public Test_MultiJProgressBars_MultiFileReads() { setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); setBounds(100, 100, 799, 568); contentPane = new JPanel(); contentPane.setBorder(new EmptyBorder(5, 5, 5, 5)); setContentPane(contentPane); contentPane.setLayout(null); progressBar_F1 = new JProgressBar(); progressBar_F1.setStringPainted(true); progressBar_F1.setBounds(10, 96, 763, 50); contentPane.add(progressBar_F1); progressBar_F2 = new JProgressBar(); progressBar_F2.setStringPainted(true); progressBar_F2.setBounds(10, 169, 763, 50); contentPane.add(progressBar_F2); JScrollPane scrollPane = new JScrollPane(); scrollPane.setBounds(10, 264, 763, 109); contentPane.add(scrollPane); textArea_File1 = new JTextArea(); scrollPane.setViewportView(textArea_File1); JScrollPane scrollPane_1 = new JScrollPane(); scrollPane_1.setBounds(10, 409, 763, 110); contentPane.add(scrollPane_1); JTextArea textArea_FIle2 = new JTextArea(); scrollPane_1.setViewportView(textArea_FIle2); JButton btnStart = new JButton("SCAN"); btnStart.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent arg0) { // Call FileLineCount Method and setMaximum value on respective JPorgress Bars. try { FileLineCount(textField_File1,progressBar_F1); FileLineCount(textField_File2,progressBar_F2); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Call ScanFileForMatches to Scan files and Update JProgress Bars. Thread t1 = new Thread (new Runnable() { @Override public void run() { // TODO Auto-generated method stub //ScanFileForMatches(); ScanFileForMatches_TEST(); } }); t1.start(); } }); btnStart.setFont(new Font("Tahoma", Font.BOLD, 11)); btnStart.setBounds(684, 10, 89, 57); contentPane.add(btnStart); textField_File1 = new JTextField(); textField_File1.setBounds(10, 10, 486, 23); contentPane.add(textField_File1); textField_File1.setColumns(10); textField_File2 = new JTextField(); textField_File2.setBounds(10, 44, 486, 23); contentPane.add(textField_File2); textField_File2.setColumns(10); JButton btnFile_File1 = new JButton("File 1"); btnFile_File1.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent arg0) { JFileChooser JFC_File1 = new JFileChooser(); JFC_File1.showOpenDialog(null); File JFC_File1_Name = JFC_File1.getSelectedFile(); textField_File1.setText(JFC_File1_Name.getAbsolutePath()); } }); btnFile_File1.setBounds(506, 10, 89, 23); contentPane.add(btnFile_File1); JButton btnFile_File2 = new JButton("File 2"); btnFile_File2.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent arg0) { JFileChooser JFC_File2 = new JFileChooser(); JFC_File2.showOpenDialog(null); File JFC_File2_Name = JFC_File2.getSelectedFile(); textField_File2.setText(JFC_File2_Name.getAbsolutePath()); } }); btnFile_File2.setBounds(506, 44, 89, 23); contentPane.add(btnFile_File2); JLabel lblFile = new JLabel("File 1 Progress"); lblFile.setBounds(20, 78, 137, 14); contentPane.add(lblFile); JLabel lblFile_1 = new JLabel("File 2 Progress"); lblFile_1.setBounds(20, 150, 137, 14); contentPane.add(lblFile_1); JLabel lblFileLog = new JLabel("File 2 Log"); lblFileLog.setBounds(20, 384, 147, 14); contentPane.add(lblFileLog); JLabel lblFileLog_1 = new JLabel("File 1 Log"); lblFileLog_1.setBounds(20, 239, 147, 14); contentPane.add(lblFileLog_1); } }
Advertisement
Answer
Your current solution is lineary iterating through file1, and for each line lineary iterating through file2. This effectively results in a running time of O(F1*F2)
: The time it takes to run will scale quadratically by the numer of lines (F1 and F2) in your files. Plus file2 is put into memory each time it’s checked for a match, which is very expensive.
A better solution would be to read file2 into memory (Eg. an ArrayList) and sort it:
Collections.sort(file2);
Then file1 could be iterated as you currently do, and for each line use Binary Search to check if that String exists in file2:
for (String s1 : file1) int index = Collections.binarySearch(file2, s1);
Index would be non-negative if s1 is in file2.
This solution takes linearithmic time instead of quadratic and thus scales much better on larger inputs.
If you would like to improve the time it takes to sort, consider MSD Sort instead of Collections.sort
. Only a minor improvement, but hey, it counts.