import java.util.ArrayList;
import java.awt.Desktop;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;
import javafx.application.Application;
import javafx.event.ActionEvent;
import javafx.event.EventHandler;
import javafx.geometry.Insets;
import javafx.geometry.HPos;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.CheckBox;
import javafx.scene.layout.GridPane;
import javafx.scene.layout.Pane;
import javafx.scene.layout.HBox;
import javafx.scene.layout.VBox;
import javafx.scene.text.Text;
import javafx.scene.control.TextField;
import javafx.scene.control.ComboBox;
import javafx.scene.control.RadioButton;
import javafx.scene.control.ToggleGroup;
import javafx.stage.FileChooser;
import javafx.stage.Stage;
/**
* Taxonomic Matching Module
*
* Given two FASTA files, this module will filter each so that only entries representing a taxonomic group present in both will be kept; others will be put in a discard pile.
*
* @author Devin Camenares, PhD
*
* @version 5-17-16
* @since 1-19-16
*/
/*
Taxonomic Matching Module
Copyright (C) 2016 Devin Camenares
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
public final class TaxonomyFilter extends Application {
private Desktop desktop = Desktop.getDesktop();
/**
* This method was written by CC and modified by DJC, based upon code from StackOverflow user Kip.
* It takes a string and writes it to a specified file, appending said file.
* The purpose of this method is to save information and results as they are generated, reducing memory load.
*
* @param content The string to be written
* @param file The destination file for the string output
*/
private void writeFile(String content, File file){
try (FileWriter fw = new FileWriter(file, true)) {
fw.write(content);//
} catch (IOException ex)
{
Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* This method was written by CC and modified by DJC, based upon code from StackOverflow user Kip.
* It takes a string and writes it to a specified file, appending said file, with another line break included.
* The purpose of this method is to save information and results as they are generated, reducing memory load.
*
* @param content The string to be written
* @param file The destination file for the string output
*/
private void writeFileN(String content, File file){
try (FileWriter fw = new FileWriter(file, true)) {
String contentN = content + System.lineSeparator() + "";
fw.write(contentN);//
} catch (IOException ex)
{
Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* This method was written by CC
*
* @param file The desired file to be opened.
*/
private void openFile(File file) {
try {
desktop.open(file);
} catch (IOException ex) {
Logger.getLogger(
TaxonomyFilter.class.getName()).log(
Level.SEVERE, null, ex
);
}
}
/**
* A timestamp, used for generating unique file IDs
*/
final long timeUnique = System.currentTimeMillis();
// Initialize All Global Variables
public static String flName1 = "";
public static String flName2 = "";
public static String flPath1 = "";
public static String flPath2 = "";
public static String exactO = "Exact";
public static String strainO = "Strain";
public static String subsO = "Subspecies";
public static String speciesO = "Species";
public static String genusO = "Genus";
public static String bestO = "Best Match";
public static String fastO = "Fast Match";
/**
* Integer counter to track the number of sequences in the first collection
*/
public static int seqNum1 = 0;
/**
* Integer counter to track the number of sequences in the second collection
*/
public static int seqNum2 = 0;
/**
* A string container to keep track of the userChoice and take appropriate action
*/
public static String userChoice = "";
/**
* Generates a directory name based on a 'random' timestamp
*/
public String dirName = Long.toString(timeUnique);
/**
* Container for first file information / path
*/
File file1 = new File("");
/**
* Container for second file information / path
*/
File file2 = new File("");
@Override
public void start(final Stage stage) {
stage.setTitle("Taxonomy Filter");
// Initialize Filechooser, Buttons
final FileChooser fileChooser = new FileChooser();
final Button openButton1 = new Button("Open Sequence File");
final Button openButton2 = new Button("Open Sequence File");
final Button processingButton = new Button("Process File");
final Button idButton = new Button("Generate New Job ID");
// Initialize Gridpane
final GridPane inputGridPane = new GridPane();
// Set Master Style Strings
String bigText = "-fx-font: 35px Tahoma";
// Initialize ComboBoxes. Contributed by Christopher Camenares, with Modification
final ComboBox comboBox1;
comboBox1 = new ComboBox<>();
comboBox1.getItems().addAll(genusO, speciesO, subsO, strainO, exactO, bestO, fastO);
comboBox1.setMinWidth(100);
comboBox1.setMinHeight(25);
comboBox1.setValue(bestO);
HBox comboBoxSection1;
comboBoxSection1 = new HBox();
comboBoxSection1.setSpacing(10);
comboBoxSection1.getChildren().addAll(comboBox1);
// Initialize Texts. Contributed by Christopher Camenares, with Modification
Text lbl1 = new Text(" Molecules ");
lbl1.setStyle(bigText);
Text lbl3 = new Text("No File Loaded");
Text lbl4 = new Text("No File Loaded");
Text lbl5 = new Text(System.lineSeparator() + "Output Parameters");
lbl5.setStyle(bigText);
Text lbl6 = new Text("Match Stringency");
Text lbl7 = new Text("Job ID#: ");
Text lbl8 = new Text("Job Status Pending");
final ToggleGroup group = new ToggleGroup();
RadioButton rb1 = new RadioButton("First Match");
rb1.setToggleGroup(group);
RadioButton rb2 = new RadioButton("All Matches");
rb2.setToggleGroup(group);
rb2.setSelected(true);
RadioButton rb3 = new RadioButton("Random Match");
rb3.setToggleGroup(group);
// Text Field. Contributed by Christopher Camenares
TextField jobID;
jobID = new TextField();
jobID.setText(dirName);
jobID.setMinHeight(50);
// Setup the Grid, Populate with items
inputGridPane.add(lbl1, 1, 0);
inputGridPane.setColumnSpan(lbl1, 2);
inputGridPane.setHalignment(lbl1, HPos.CENTER);
inputGridPane.add(openButton1, 0, 1);
inputGridPane.setColumnSpan(openButton1, 2);
inputGridPane.add(openButton2, 2, 1);
inputGridPane.setColumnSpan(openButton2, 2);
inputGridPane.add(lbl3, 0, 2);
inputGridPane.setColumnSpan(lbl3, 2);
inputGridPane.add(lbl4, 2, 2);
inputGridPane.setColumnSpan(lbl4, 2);
inputGridPane.add(lbl5, 1, 3);
inputGridPane.setColumnSpan(lbl5, 2);
inputGridPane.setHalignment(lbl5, HPos.CENTER);
inputGridPane.add(lbl6, 1, 4);
inputGridPane.add(comboBoxSection1, 2, 4);
inputGridPane.add(lbl7, 1, 5);
inputGridPane.add(jobID, 2, 5);
inputGridPane.add(rb1, 2, 8);
inputGridPane.add(rb2, 2, 9);
inputGridPane.add(rb3, 2, 10);
inputGridPane.add(processingButton, 1, 11);
inputGridPane.setColumnSpan(processingButton, 2);
inputGridPane.setHalignment(processingButton, HPos.CENTER);
inputGridPane.add(idButton, 1, 6);
inputGridPane.setColumnSpan(idButton, 2);
inputGridPane.setHalignment(idButton, HPos.CENTER);
inputGridPane.add(lbl8, 1, 7);
inputGridPane.setColumnSpan(lbl8, 2);
inputGridPane.setHalignment(lbl8, HPos.CENTER);
inputGridPane.setHgap(6);
inputGridPane.setVgap(6);
final Pane rootGroup = new VBox(12);
rootGroup.getChildren().addAll(inputGridPane);
rootGroup.setPadding(new Insets(12, 12, 12, 12));
/**
* Defines button action: generates new job ID
*/
idButton.setOnAction(
new EventHandler() {
@Override
public void handle(final ActionEvent e) {
final long timeButton = System.currentTimeMillis();
jobID.setText(Long.toString(timeButton));
}
}
);
openButton1.setOnAction(
new EventHandler() {
@Override
public void handle(final ActionEvent e) {
configureFileChooser(fileChooser);
file1 = fileChooser.showOpenDialog(stage);
if (file1 != null) {
flName1 = file1.getName();
flPath1 = file1.getPath();
lbl3.setText(flName1);
}
}
});
openButton2.setOnAction(
new EventHandler() {
@Override
public void handle(final ActionEvent e) {
configureFileChooser(fileChooser);
file2 = fileChooser.showOpenDialog(stage);
if (file2 != null) {
flName2 = file2.getName();
flPath2 = file2.getPath();
lbl4.setText(flName2);
}
}
});
processingButton.setOnAction(
new EventHandler() {
@Override
public void handle(final ActionEvent e) {
if (file1 != null && file2 != null)
{
/**
* Timestamp used to mark the beginning of processing time.
*/
final long timeStart = System.currentTimeMillis();
lbl8.setText("Processing.");
// Get new name for directory
dirName = jobID.getText();
// Get and set user choice for matching stringency
userChoice = comboBox1.getValue();
/**
* String container for reporting endtime information
*/
String repF = "";
/**
* String container for holding sequence information for writing
*/
String seqF = "";
/**
* Counter to determine how many sequences from 1st collection are kept
*/
int keep1 = 0;
/**
* Counter to determine how many sequences from 2nd collection are kept
*/
int keep2 = 0;
/**
* Counter to determine how many sequences from 1st collection are discarded
*/
int waste1 = 0;
/**
* Counter to determine how many sequences from 2nd collection are discarded
*/
int waste2 = 0;
/**
* A convenient way to add a double line break
*/
String dblSpace = System.lineSeparator() + System.lineSeparator();
// Make a new directory, specify file destinations
File dir = new File(dirName);
dir.mkdir();
// Results File
String pathName = dirName + "\\TaxonomyFilter_result_keep_1.txt";
/**
* File to which the 1st sequence collection matches will be written
*/
File file3 = new File(pathName);
pathName = dirName + "\\TaxonomyFilter_result_keep_2.txt";
/**
* File to which the 2nd sequence collection matches will be written
*/
File file4 = new File(pathName);
pathName = dirName + "\\TaxonomyFilter_result_waste_1.txt";
/**
* File to which the 1st sequence collection discards will be written
*/
File file5 = new File(pathName);
pathName = dirName + "\\TaxonomyFilter_result_waste_2.txt";
/**
* File to which the 2nd sequence collection discards will be written
*/
File file6 = new File(pathName);
pathName = dirName + "\\TaxonomyFilter_report.txt";
/**
* File to which the runtime information will be written
*/
File file7 = new File(pathName);
// Begin processing routine / calculations
lbl8.setText("Processing.....");
/**
* Array containing the Header line from each sequence in File1
*/
ArrayList arr1Head = new ArrayList<>();
/**
* Array containing the sequence body from each sequence in File1
*/
ArrayList arr1Body = new ArrayList<>();
/**
* Array containing the taxonomic information from each sequence in File1
*/
ArrayList arr1Tax = new ArrayList<>();
/**
* Array containing the Header line from each sequence in File2
*/
ArrayList arr2Head = new ArrayList<>();
/**
* Array containing the sequence body from each sequence in File2
*/
ArrayList arr2Body = new ArrayList<>();
/**
* Array containing the taxonomic information from each sequence in File2
*/
ArrayList arr2Tax = new ArrayList<>();
/**
* A switch to help determine that body sequence be added.
*/
boolean addBody1 = false;
boolean addBodyNoID = false;
/**
* A counter to track the position in the arrayList a sequence body should deposit
*/
int fileLine1 = -1;
int fileLineNoID = -1;
// Read the first file, store as array for reference
try (BufferedReader br = new BufferedReader(new FileReader(file1))) {
/**
* The line being read from a file at the moment
*/
String line;
while ((line = br.readLine()) != null)
{
if (line.contains(">"))
{
seqNum1++;
addBody1 = true;
addBodyNoID = false;
if(line.contains("[") && line.contains("]"))
{
fileLine1++;
String taxID = "";
taxID = line.replaceAll("(.*) \\[", "");
taxID = taxID.replaceAll("](.*)", "");
arr1Tax.add(taxID);
arr1Head.add(line);
arr1Body.add("");
}
else
{
fileLineNoID++;
String spacerNoID = "";
if (fileLineNoID > 0)
{
spacerNoID = System.lineSeparator() + System.lineSeparator();;
}
addBody1 = false;
addBodyNoID = true;
waste1++;
writeFile(spacerNoID + line + System.lineSeparator(), file5);
}
}
else if(addBody1 && !line.contains(">"))
{
String oldLine = arr1Body.get(fileLine1);
oldLine += line;
arr1Body.set(fileLine1, oldLine);
}
else if(addBodyNoID && !line.contains(">"))
{
writeFile(line, file5);
}
}
} catch (FileNotFoundException ex) {
Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex);
}
// Read the second file, process each step
try (BufferedReader br = new BufferedReader(new FileReader(file2))) {
/**
* The line being read from a file at the moment
*/
String line;
String taxID = "";
String seqHead = "";
//Reinitialize values
addBody1 = false;
addBodyNoID = false;
fileLine1 = -1;
/**
* Temporary container to hold or build the sequence body from lines
*/
String seqBody = "";
while ((line = br.readLine()) != null)
{
if (line.contains(">"))
{
seqNum2++;
addBody1 = true;
addBodyNoID = false;
if(line.contains("[") && line.contains("]"))
{
fileLine1++;
taxID = "";
taxID = line.replaceAll("(.*) \\[", "");
taxID = taxID.replaceAll("](.*)", "");
arr2Tax.add(taxID);
arr2Head.add(line);
arr2Body.add("");
}
else
{
fileLineNoID++;
String spacerNoID = "";
if (fileLineNoID > 0)
{
spacerNoID = System.lineSeparator() + System.lineSeparator();;
}
addBody1 = false;
addBodyNoID = true;
writeFile(spacerNoID + line + System.lineSeparator(), file6);
}
}
else if(addBody1 && !line.contains(">"))
{
String oldLine = arr2Body.get(fileLine1);
oldLine += line;
arr2Body.set(fileLine1, oldLine);
}
else if(addBodyNoID && !line.contains(">"))
{
writeFile(line, file6);
}
}
} catch (FileNotFoundException ex) {
Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(TaxonomyFilter.class.getName()).log(Level.SEVERE, null, ex);
}
// Set parameters for matching algorithim
String matchChoice = "";
// Search for matches!!
ArrayList> arrMatch = new ArrayList<>();
boolean[] matched2 = new boolean[arr2Tax.size()];
for (int i = 0; i < arr1Tax.size(); i++)
{
ArrayList tempMatch = new ArrayList<>();
tempMatch.add(i);
int extras = 0;
for (int j = 0; j < arr2Tax.size(); j++)
{
if(matchTax(arr1Tax.get(i), arr2Tax.get(j)))
{
tempMatch.add(j);
matched2[j] = true;
}
}
arrMatch.add(tempMatch);
}
for (int i = 0; i < arrMatch.size(); i++)
{
if(arrMatch.get(i).size() > 1)
{
keep1++;
if(group.getSelectedToggle() == rb1)
{
matchChoice = "First Match";
writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file3);
writeFile(arr2Head.get(arrMatch.get(i).get(1)) + System.lineSeparator() + arr2Body.get(arrMatch.get(i).get(1)) + dblSpace, file4);
}
else if(group.getSelectedToggle() == rb2)
{
matchChoice = "All Matches";
for (int j = 1; j < arrMatch.get(i).size(); j++)
{
writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file3);
writeFile(arr2Head.get(arrMatch.get(i).get(j)) + System.lineSeparator() + arr2Body.get(arrMatch.get(i).get(j)) + dblSpace, file4);
}
}
else if(group.getSelectedToggle() == rb3)
{
matchChoice = "Random Match";
writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file3);
Random rand = new Random();
int randomSelection = rand.nextInt(arrMatch.get(i).size());
randomSelection = Math.max(randomSelection, 1);
writeFile(arr2Head.get(arrMatch.get(i).get(randomSelection)) + System.lineSeparator() + arr2Body.get(arrMatch.get(i).get(randomSelection)) + dblSpace, file4);
}
}
else
{
waste1++;
writeFile(arr1Head.get(arrMatch.get(i).get(0)) + System.lineSeparator() + arr1Body.get(arrMatch.get(i).get(0)) + dblSpace, file5);
}
}
//Find unmatched sequences from collection 2
for (int i = 0; i < matched2.length; i++)
{
if(!matched2[i])
{
waste2++;
writeFile(arr2Head.get(i) + System.lineSeparator() + arr2Body.get(i) + dblSpace, file6);
}
else
{
keep2++;
}
}
final long timeEnd = System.currentTimeMillis();
repF += "Source #1 File: " + flPath1 + System.lineSeparator();
repF += System.lineSeparator();
repF += "Source #2 File: " + flPath2 + System.lineSeparator();
repF += System.lineSeparator();
repF += "Matching Stringency: " + comboBox1.getValue() + ", " + matchChoice + System.lineSeparator();
repF += System.lineSeparator();
repF += "Before Processing" + System.lineSeparator();
repF += "1st collection: " + Integer.toString(seqNum1) + " sequences" + System.lineSeparator();
repF += "2nd collection: " + Integer.toString(seqNum2) + " sequences" + System.lineSeparator();
repF += System.lineSeparator();
repF += "After Processing" + System.lineSeparator();
repF += "1st collection: " + Integer.toString(keep1) + " sequences retained, " + Integer.toString(waste1) + " sequences discarded" + System.lineSeparator();
repF += "2nd collection: " + Integer.toString(keep2) + " sequences retained, " + Integer.toString(waste2) + " sequences discarded" + System.lineSeparator();
repF += "Unique Taxonomic IDs, 1st collection: " + Integer.toString(uniqueArray(arr1Tax)) + System.lineSeparator();
repF += "Unique Taxonomic IDs, 2nd collection " + Integer.toString(uniqueArray(arr2Tax)) + System.lineSeparator();
repF += System.lineSeparator();
repF += Long.toString(timeEnd - timeStart) + " milliseconds of runtime";
// Write the result to the file, report success
writeFile(repF, file7);
lbl8.setText("Files Saved!");
openFile(dir);
}
else
{
lbl8.setText("Processing Error! Sorry...");
}
}
});
stage.setScene(new Scene(rootGroup));
stage.show();
}
public static void main(String[] args) {
Application.launch(args);
}
private static void configureFileChooser(
final FileChooser fileChooser) {
fileChooser.setTitle("View Files");
fileChooser.setInitialDirectory(
new File(System.getProperty("user.home"))
);
fileChooser.getExtensionFilters().addAll(
new FileChooser.ExtensionFilter("Plain Text", "*.txt"),
new FileChooser.ExtensionFilter("FASTA", "*.fasta"),
new FileChooser.ExtensionFilter("Rich Text Format", "*.rtf"),
new FileChooser.ExtensionFilter("All Files", "*.*")
);
}
/**
*
* @param str1
* @param str2
* @return
*/
private static boolean matchTax (String str1, String str2)
{
boolean taxMatch = false;
String[] arr1;
arr1 = str1.split(" ");
String[] arr2;
arr2 = str2.split(" ");
int searchDepth = -1;
int minArrSize = Math.min(arr1.length, arr2.length);
int maxArrSize = Math.max(arr1.length, arr2.length);
// Add code to deal with user choices
switch (userChoice)
{
case "Best Match":
//
for (int m = 0; m < minArrSize; m++)
{
if (arr1[m].equals(arr2[m])) {
taxMatch = true;
}
else
{
taxMatch = false;
m = minArrSize;
}
}
return taxMatch;
case "Fast Match":
//
if (str1.contains(str2) || str2.contains(str1))
{
taxMatch = true;
return taxMatch;
}
break;
case "Genus":
searchDepth = 1;
//
break;
case "Species":
searchDepth = 2;
//
break;
case "Subspecies":
searchDepth = 3;
//
break;
case "Strains":
searchDepth = 4;
//
break;
case "Exact":
//
if (str1.equals(str2))
{
taxMatch = true;
return taxMatch;
}
break;
}
searchDepth = Math.min(minArrSize, searchDepth);
// Process if defined match indicated
if (searchDepth > -1)
{
for (int i = 0; i < searchDepth; i++)
{
if (arr1[i].equals(arr2[i])) {
taxMatch = true;
} else {
taxMatch = false;
break;
}
}
}
return taxMatch;
}
private static int uniqueArray (ArrayList arr)
{
int uniqueT = 0;
for (int i = 0; i < arr.size(); i++)
{
boolean unique = true;
for (int j = 0; j < arr.size(); j++)
{
if(arr.get(i).equals(arr.get(j)) && i != j)
{
unique = false;
j = arr.size();
}
}
if(unique)
{
uniqueT++;
}
}
return uniqueT;
};
}