import java.util.ArrayList; import java.awt.Desktop; import java.util.logging.Level; import java.util.logging.Logger; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.Random; import javafx.application.Application; import javafx.event.ActionEvent; import javafx.event.EventHandler; import javafx.geometry.Insets; import javafx.geometry.HPos; import javafx.scene.Scene; import javafx.scene.control.Button; import javafx.scene.control.CheckBox; import javafx.scene.layout.GridPane; import javafx.scene.layout.Pane; import javafx.scene.layout.HBox; import javafx.scene.layout.VBox; import javafx.scene.text.Text; import javafx.scene.control.TextField; import javafx.scene.control.ComboBox; import javafx.scene.control.RadioButton; import javafx.scene.control.ToggleGroup; import javafx.stage.FileChooser; import javafx.stage.Stage; /** * Taxonomic Matching Module * * Given two FASTA files, this module will filter each so that only entries representing a taxonomic group present in both will be kept; others will be put in a discard pile. * * @author Devin Camenares, PhD * * @version 5-17-16 * @since 1-19-16 */ /* Taxonomic Matching Module Copyright (C) 2016 Devin Camenares This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ public final class TaxonomyFilter extends Application { private Desktop desktop = Desktop.getDesktop(); /** * This method was written by CC and modified by DJC, based upon code from StackOverflow user Kip. * It takes a string and writes it to a specified file, appending said file. * The purpose of this method is to save information and results as they are generated, reducing memory load. * * @param content The string to be written * @param file The destination file for the string output */ private void writeFile(String content, File file){ try (FileWriter fw = new FileWriter(file, true)) { fw.write(content);// } catch (IOException ex) { Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex); } } /** * This method was written by CC and modified by DJC, based upon code from StackOverflow user Kip. * It takes a string and writes it to a specified file, appending said file, with another line break included. * The purpose of this method is to save information and results as they are generated, reducing memory load. * * @param content The string to be written * @param file The destination file for the string output */ private void writeFileN(String content, File file){ try (FileWriter fw = new FileWriter(file, true)) { String contentN = content + System.lineSeparator() + ""; fw.write(contentN);// } catch (IOException ex) { Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex); } } /** * This method was written by CC * * @param file The desired file to be opened. */ private void openFile(File file) { try { desktop.open(file); } catch (IOException ex) { Logger.getLogger( TaxonomyFilter.class.getName()).log( Level.SEVERE, null, ex ); } } /** * A timestamp, used for generating unique file IDs */ final long timeUnique = System.currentTimeMillis(); // Initialize All Global Variables public static String flName1 = ""; public static String flName2 = ""; public static String flPath1 = ""; public static String flPath2 = ""; public static String exactO = "Exact"; public static String strainO = "Strain"; public static String subsO = "Subspecies"; public static String speciesO = "Species"; public static String genusO = "Genus"; public static String bestO = "Best Match"; public static String fastO = "Fast Match"; /** * Integer counter to track the number of sequences in the first collection */ public static int seqNum1 = 0; /** * Integer counter to track the number of sequences in the second collection */ public static int seqNum2 = 0; /** * A string container to keep track of the userChoice and take appropriate action */ public static String userChoice = ""; /** * Generates a directory name based on a 'random' timestamp */ public String dirName = Long.toString(timeUnique); /** * Container for first file information / path */ File file1 = new File(""); /** * Container for second file information / path */ File file2 = new File(""); @Override public void start(final Stage stage) { stage.setTitle("Taxonomy Filter"); // Initialize Filechooser, Buttons final FileChooser fileChooser = new FileChooser(); final Button openButton1 = new Button("Open Sequence File"); final Button openButton2 = new Button("Open Sequence File"); final Button processingButton = new Button("Process File"); final Button idButton = new Button("Generate New Job ID"); // Initialize Gridpane final GridPane inputGridPane = new GridPane(); // Set Master Style Strings String bigText = "-fx-font: 35px Tahoma"; // Initialize ComboBoxes. Contributed by Christopher Camenares, with Modification final ComboBox comboBox1; comboBox1 = new ComboBox<>(); comboBox1.getItems().addAll(genusO, speciesO, subsO, strainO, exactO, bestO, fastO); comboBox1.setMinWidth(100); comboBox1.setMinHeight(25); comboBox1.setValue(bestO); HBox comboBoxSection1; comboBoxSection1 = new HBox(); comboBoxSection1.setSpacing(10); comboBoxSection1.getChildren().addAll(comboBox1); // Initialize Texts. Contributed by Christopher Camenares, with Modification Text lbl1 = new Text(" Molecules "); lbl1.setStyle(bigText); Text lbl3 = new Text("No File Loaded"); Text lbl4 = new Text("No File Loaded"); Text lbl5 = new Text(System.lineSeparator() + "Output Parameters"); lbl5.setStyle(bigText); Text lbl6 = new Text("Match Stringency"); Text lbl7 = new Text("Job ID#: "); Text lbl8 = new Text("Job Status Pending"); final ToggleGroup group = new ToggleGroup(); RadioButton rb1 = new RadioButton("First Match"); rb1.setToggleGroup(group); RadioButton rb2 = new RadioButton("All Matches"); rb2.setToggleGroup(group); rb2.setSelected(true); RadioButton rb3 = new RadioButton("Random Match"); rb3.setToggleGroup(group); // Text Field. Contributed by Christopher Camenares TextField jobID; jobID = new TextField(); jobID.setText(dirName); jobID.setMinHeight(50); // Setup the Grid, Populate with items inputGridPane.add(lbl1, 1, 0); inputGridPane.setColumnSpan(lbl1, 2); inputGridPane.setHalignment(lbl1, HPos.CENTER); inputGridPane.add(openButton1, 0, 1); inputGridPane.setColumnSpan(openButton1, 2); inputGridPane.add(openButton2, 2, 1); inputGridPane.setColumnSpan(openButton2, 2); inputGridPane.add(lbl3, 0, 2); inputGridPane.setColumnSpan(lbl3, 2); inputGridPane.add(lbl4, 2, 2); inputGridPane.setColumnSpan(lbl4, 2); inputGridPane.add(lbl5, 1, 3); inputGridPane.setColumnSpan(lbl5, 2); inputGridPane.setHalignment(lbl5, HPos.CENTER); inputGridPane.add(lbl6, 1, 4); inputGridPane.add(comboBoxSection1, 2, 4); inputGridPane.add(lbl7, 1, 5); inputGridPane.add(jobID, 2, 5); inputGridPane.add(rb1, 2, 8); inputGridPane.add(rb2, 2, 9); inputGridPane.add(rb3, 2, 10); inputGridPane.add(processingButton, 1, 11); inputGridPane.setColumnSpan(processingButton, 2); inputGridPane.setHalignment(processingButton, HPos.CENTER); inputGridPane.add(idButton, 1, 6); inputGridPane.setColumnSpan(idButton, 2); inputGridPane.setHalignment(idButton, HPos.CENTER); inputGridPane.add(lbl8, 1, 7); inputGridPane.setColumnSpan(lbl8, 2); inputGridPane.setHalignment(lbl8, HPos.CENTER); inputGridPane.setHgap(6); inputGridPane.setVgap(6); final Pane rootGroup = new VBox(12); rootGroup.getChildren().addAll(inputGridPane); rootGroup.setPadding(new Insets(12, 12, 12, 12)); /** * Defines button action: generates new job ID */ idButton.setOnAction( new EventHandler() { @Override public void handle(final ActionEvent e) { final long timeButton = System.currentTimeMillis(); jobID.setText(Long.toString(timeButton)); } } ); openButton1.setOnAction( new EventHandler() { @Override public void handle(final ActionEvent e) { configureFileChooser(fileChooser); file1 = fileChooser.showOpenDialog(stage); if (file1 != null) { flName1 = file1.getName(); flPath1 = file1.getPath(); lbl3.setText(flName1); } } }); openButton2.setOnAction( new EventHandler() { @Override public void handle(final ActionEvent e) { configureFileChooser(fileChooser); file2 = fileChooser.showOpenDialog(stage); if (file2 != null) { flName2 = file2.getName(); flPath2 = file2.getPath(); lbl4.setText(flName2); } } }); processingButton.setOnAction( new EventHandler() { @Override public void handle(final ActionEvent e) { if (file1 != null && file2 != null) { /** * Timestamp used to mark the beginning of processing time. */ final long timeStart = System.currentTimeMillis(); lbl8.setText("Processing."); // Get new name for directory dirName = jobID.getText(); // Get and set user choice for matching stringency userChoice = comboBox1.getValue(); /** * String container for reporting endtime information */ String repF = ""; /** * String container for holding sequence information for writing */ String seqF = ""; /** * Counter to determine how many sequences from 1st collection are kept */ int keep1 = 0; /** * Counter to determine how many sequences from 2nd collection are kept */ int keep2 = 0; /** * Counter to determine how many sequences from 1st collection are discarded */ int waste1 = 0; /** * Counter to determine how many sequences from 2nd collection are discarded */ int waste2 = 0; /** * A convenient way to add a double line break */ String dblSpace = System.lineSeparator() + System.lineSeparator(); // Make a new directory, specify file destinations File dir = new File(dirName); dir.mkdir(); // Results File String pathName = dirName + "\\TaxonomyFilter_result_keep_1.txt"; /** * File to which the 1st sequence collection matches will be written */ File file3 = new File(pathName); pathName = dirName + "\\TaxonomyFilter_result_keep_2.txt"; /** * File to which the 2nd sequence collection matches will be written */ File file4 = new File(pathName); pathName = dirName + "\\TaxonomyFilter_result_waste_1.txt"; /** * File to which the 1st sequence collection discards will be written */ File file5 = new File(pathName); pathName = dirName + "\\TaxonomyFilter_result_waste_2.txt"; /** * File to which the 2nd sequence collection discards will be written */ File file6 = new File(pathName); pathName = dirName + "\\TaxonomyFilter_report.txt"; /** * File to which the runtime information will be written */ File file7 = new File(pathName); // Begin processing routine / calculations lbl8.setText("Processing....."); /** * Array containing the Header line from each sequence in File1 */ ArrayList arr1Head = new ArrayList<>(); /** * Array containing the sequence body from each sequence in File1 */ ArrayList arr1Body = new ArrayList<>(); /** * Array containing the taxonomic information from each sequence in File1 */ ArrayList arr1Tax = new ArrayList<>(); /** * Array containing the Header line from each sequence in File2 */ ArrayList arr2Head = new ArrayList<>(); /** * Array containing the sequence body from each sequence in File2 */ ArrayList arr2Body = new ArrayList<>(); /** * Array containing the taxonomic information from each sequence in File2 */ ArrayList arr2Tax = new ArrayList<>(); /** * A switch to help determine that body sequence be added. */ boolean addBody1 = false; boolean addBodyNoID = false; /** * A counter to track the position in the arrayList a sequence body should deposit */ int fileLine1 = -1; int fileLineNoID = -1; // Read the first file, store as array for reference try (BufferedReader br = new BufferedReader(new FileReader(file1))) { /** * The line being read from a file at the moment */ String line; while ((line = br.readLine()) != null) { if (line.contains(">")) { seqNum1++; addBody1 = true; addBodyNoID = false; if(line.contains("[") && line.contains("]")) { fileLine1++; String taxID = ""; taxID = line.replaceAll("(.*) \\[", ""); taxID = taxID.replaceAll("](.*)", ""); arr1Tax.add(taxID); arr1Head.add(line); arr1Body.add(""); } else { fileLineNoID++; String spacerNoID = ""; if (fileLineNoID > 0) { spacerNoID = System.lineSeparator() + System.lineSeparator();; } addBody1 = false; addBodyNoID = true; waste1++; writeFile(spacerNoID + line + System.lineSeparator(), file5); } } else if(addBody1 && !line.contains(">")) { String oldLine = arr1Body.get(fileLine1); oldLine += line; arr1Body.set(fileLine1, oldLine); } else if(addBodyNoID && !line.contains(">")) { writeFile(line, file5); } } } catch (FileNotFoundException ex) { Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex); } // Read the second file, process each step try (BufferedReader br = new BufferedReader(new FileReader(file2))) { /** * The line being read from a file at the moment */ String line; String taxID = ""; String seqHead = ""; //Reinitialize values addBody1 = false; addBodyNoID = false; fileLine1 = -1; /** * Temporary container to hold or build the sequence body from lines */ String seqBody = ""; while ((line = br.readLine()) != null) { if (line.contains(">")) { seqNum2++; addBody1 = true; addBodyNoID = false; if(line.contains("[") && line.contains("]")) { fileLine1++; taxID = ""; taxID = line.replaceAll("(.*) \\[", ""); taxID = taxID.replaceAll("](.*)", ""); arr2Tax.add(taxID); arr2Head.add(line); arr2Body.add(""); } else { fileLineNoID++; String spacerNoID = ""; if (fileLineNoID > 0) { spacerNoID = System.lineSeparator() + System.lineSeparator();; } addBody1 = false; addBodyNoID = true; writeFile(spacerNoID + line + System.lineSeparator(), file6); } } else if(addBody1 && !line.contains(">")) { String oldLine = arr2Body.get(fileLine1); oldLine += line; arr2Body.set(fileLine1, oldLine); } else if(addBodyNoID && !line.contains(">")) { writeFile(line, file6); } } } catch (FileNotFoundException ex) { Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex); } // Set parameters for matching algorithim String matchChoice = ""; // Search for matches!! ArrayList> arrMatch = new ArrayList<>(); boolean[] matched2 = new boolean[arr2Tax.size()]; for (int i = 0; i < arr1Tax.size(); i++) { ArrayList tempMatch = new ArrayList<>(); tempMatch.add(i); int extras = 0; for (int j = 0; j < arr2Tax.size(); j++) { if(matchTax(arr1Tax.get(i), arr2Tax.get(j))) { tempMatch.add(j); matched2[j] = true; } } arrMatch.add(tempMatch); } for (int i = 0; i < arrMatch.size(); i++) { if(arrMatch.get(i).size() > 1) { keep1++; if(group.getSelectedToggle() == rb1) { matchChoice = "First Match"; writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file3); writeFile(arr2Head.get(arrMatch.get(i).get(1)) + System.lineSeparator() + arr2Body.get(arrMatch.get(i).get(1)) + dblSpace, file4); } else if(group.getSelectedToggle() == rb2) { matchChoice = "All Matches"; for (int j = 1; j < arrMatch.get(i).size(); j++) { writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file3); writeFile(arr2Head.get(arrMatch.get(i).get(j)) + System.lineSeparator() + arr2Body.get(arrMatch.get(i).get(j)) + dblSpace, file4); } } else if(group.getSelectedToggle() == rb3) { matchChoice = "Random Match"; writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file3); Random rand = new Random(); int randomSelection = rand.nextInt(arrMatch.get(i).size()); randomSelection = Math.max(randomSelection, 1); writeFile(arr2Head.get(arrMatch.get(i).get(randomSelection)) + System.lineSeparator() + arr2Body.get(arrMatch.get(i).get(randomSelection)) + dblSpace, file4); } } else { waste1++; writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file5); } } //Find unmatched sequences from collection 2 for (int i = 0; i < matched2.length; i++) { if(!matched2[i]) { waste2++; writeFile(arr2Head.get(i) + System.lineSeparator() + arr2Body.get(i) + dblSpace, file6); } else { keep2++; } } final long timeEnd = System.currentTimeMillis(); repF += "Source #1 File: " + flPath1 + System.lineSeparator(); repF += System.lineSeparator(); repF += "Source #2 File: " + flPath2 + System.lineSeparator(); repF += System.lineSeparator(); repF += "Matching Stringency: " + comboBox1.getValue() + ", " + matchChoice + System.lineSeparator(); repF += System.lineSeparator(); repF += "Before Processing" + System.lineSeparator(); repF += "1st collection: " + Integer.toString(seqNum1) + " sequences" + System.lineSeparator(); repF += "2nd collection: " + Integer.toString(seqNum2) + " sequences" + System.lineSeparator(); repF += System.lineSeparator(); repF += "After Processing" + System.lineSeparator(); repF += "1st collection: " + Integer.toString(keep1) + " sequences retained, " + Integer.toString(waste1) + " sequences discarded" + System.lineSeparator(); repF += "2nd collection: " + Integer.toString(keep2) + " sequences retained, " + Integer.toString(waste2) + " sequences discarded" + System.lineSeparator(); repF += "Unique Taxonomic IDs, 1st collection: " + Integer.toString(uniqueArray(arr1Tax)) + System.lineSeparator(); repF += "Unique Taxonomic IDs, 2nd collection " + Integer.toString(uniqueArray(arr2Tax)) + System.lineSeparator(); repF += System.lineSeparator(); repF += Long.toString(timeEnd - timeStart) + " milliseconds of runtime"; // Write the result to the file, report success writeFile(repF, file7); lbl8.setText("Files Saved!"); openFile(dir); } else { lbl8.setText("Processing Error! Sorry..."); } } }); stage.setScene(new Scene(rootGroup)); stage.show(); } public static void main(String[] args) { Application.launch(args); } private static void configureFileChooser( final FileChooser fileChooser) { fileChooser.setTitle("View Files"); fileChooser.setInitialDirectory( new File(System.getProperty("user.home")) ); fileChooser.getExtensionFilters().addAll( new FileChooser.ExtensionFilter("Plain Text", "*.txt"), new FileChooser.ExtensionFilter("FASTA", "*.fasta"), new FileChooser.ExtensionFilter("Rich Text Format", "*.rtf"), new FileChooser.ExtensionFilter("All Files", "*.*") ); } /** * * @param str1 * @param str2 * @return */ private static boolean matchTax (String str1, String str2) { boolean taxMatch = false; String[] arr1; arr1 = str1.split(" "); String[] arr2; arr2 = str2.split(" "); int searchDepth = -1; int minArrSize = Math.min(arr1.length, arr2.length); int maxArrSize = Math.max(arr1.length, arr2.length); // Add code to deal with user choices switch (userChoice) { case "Best Match": // for (int m = 0; m < minArrSize; m++) { if (arr1[m].equals(arr2[m])) { taxMatch = true; } else { taxMatch = false; m = minArrSize; } } return taxMatch; case "Fast Match": // if (str1.contains(str2) || str2.contains(str1)) { taxMatch = true; return taxMatch; } break; case "Genus": searchDepth = 1; // break; case "Species": searchDepth = 2; // break; case "Subspecies": searchDepth = 3; // break; case "Strains": searchDepth = 4; // break; case "Exact": // if (str1.equals(str2)) { taxMatch = true; return taxMatch; } break; } searchDepth = Math.min(minArrSize, searchDepth); // Process if defined match indicated if (searchDepth > -1) { for (int i = 0; i < searchDepth; i++) { if (arr1[i].equals(arr2[i])) { taxMatch = true; } else { taxMatch = false; break; } } } return taxMatch; } private static int uniqueArray (ArrayList arr) { int uniqueT = 0; for (int i = 0; i < arr.size(); i++) { boolean unique = true; for (int j = 0; j < arr.size(); j++) { if(arr.get(i).equals(arr.get(j)) && i != j) { unique = false; j = arr.size(); } } if(unique) { uniqueT++; } } return uniqueT; }; }