import java.awt.Desktop; import java.util.logging.Level; import java.util.logging.Logger; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import javafx.application.Application; import javafx.event.ActionEvent; import javafx.event.EventHandler; import javafx.geometry.Insets; import javafx.scene.Scene; import javafx.scene.control.Button; import javafx.scene.layout.GridPane; import javafx.scene.layout.Pane; import javafx.scene.layout.VBox; import javafx.scene.control.TextField; import javafx.scene.control.Label; import javafx.stage.FileChooser; import javafx.stage.Stage; import java.net.URL; import java.net.URLConnection; import javafx.scene.control.ComboBox; import javafx.scene.layout.HBox; /** * Retrieve Nucleotide from DocSum * Given a DocSum file, this program will access the nucleotides sequences from NCBI and write them to a destination file. * @author Devin Camenares, PhD * * @version 5-17-16 * @since 5-12-16 */ public final class nucSeqfetch extends Application { private Desktop desktop = Desktop.getDesktop(); /** * This method was written by CC and modified by DJC, based upon code from StackOverflow user Kip. * It takes a string and writes it to a specified file, appending said file. * The purpose of this method is to save information and results as they are generated, reducing memory load. * * @param content The string to be written * @param file The destination file for the string output */ private void writeFile(String content, File file){ try (FileWriter fw = new FileWriter(file, true)) { fw.write(content);// } catch (IOException ex) { Logger.getLogger(nucSeqfetch.class.getName()).log(Level.SEVERE, null, ex); } } /** * This method was written by CC * * @param file The desired file to be opened. */ private void openFile(File file) { try { desktop.open(file); } catch (IOException ex) { Logger.getLogger( nucSeqfetch.class.getName()).log( Level.SEVERE, null, ex ); } } /** * A timestamp, used for generating unique file IDs */ final long timeUnique = System.currentTimeMillis(); /** * A directory name, created based upon timestamp. */ public String dirName = Long.toString(timeUnique); public static String rnaCat = "RNA"; public static String dnaCat = "DNA"; @Override public void start(final Stage stage) { stage.setTitle("Collection of Nucleotide Sequences from NCBI Doc Sums"); // Create Grid pane, FileChooser, and Button final GridPane inputGridPane = new GridPane(); final FileChooser fileChooser = new FileChooser(); final Button openButton = new Button("Open Doc Sum file"); final Button idButton = new Button("Generate New Job ID"); // Text Fields and Labels. Some of this framework contributed by Christopher Camenares Label lbl1 = new Label("Job ID#:"); lbl1.setMinHeight(50); lbl1.setMinWidth(250); Label lbl1A = new Label("Sequence Size Limit:"); lbl1A.setMinHeight(50); lbl1A.setMinWidth(250); Label lbl1B = new Label("Sequence Type:"); lbl1B.setMinHeight(50); lbl1B.setMinWidth(250); Label lbl2 = new Label("Awaiting File Selection"); lbl2.setMinHeight(50); lbl2.setMinWidth(100); TextField jobID = new TextField(); jobID.setText(dirName); jobID.setMinHeight(50); jobID.setMinWidth(200); TextField sizeLimit = new TextField(); sizeLimit.setText("50000"); sizeLimit.setMinHeight(50); sizeLimit.setMinWidth(200); final ComboBox comboBox1; comboBox1 = new ComboBox(); comboBox1.getItems().addAll(rnaCat, dnaCat); comboBox1.setMinWidth(100); comboBox1.setMinHeight(25); comboBox1.setValue(rnaCat); HBox comboBoxSection1; comboBoxSection1 = new HBox(); comboBoxSection1.setSpacing(10); comboBoxSection1.getChildren().addAll(comboBox1); GridPane.setConstraints(openButton, 0, 8); GridPane.setConstraints(idButton, 0, 2); GridPane.setConstraints(lbl1, 0, 0); GridPane.setConstraints(lbl1A, 0, 3); GridPane.setConstraints(lbl1B, 0, 5); GridPane.setConstraints(comboBoxSection1, 0, 6); GridPane.setConstraints(sizeLimit, 0, 4); GridPane.setConstraints(lbl2, 0, 7); GridPane.setConstraints(jobID, 0, 1); inputGridPane.setHgap(50); inputGridPane.setVgap(5); inputGridPane.getChildren().addAll(openButton, idButton, lbl1, lbl2, jobID, lbl1A, sizeLimit, lbl1B, comboBoxSection1); final Pane rootGroup = new VBox(12); rootGroup.getChildren().addAll(inputGridPane); rootGroup.setPadding(new Insets(12, 12, 12, 12)); /** * Defines button action: generates new job ID */ idButton.setOnAction( new EventHandler() { @Override public void handle(final ActionEvent e) { final long timeButton = System.currentTimeMillis(); jobID.setText(Long.toString(timeButton)); } } ); openButton.setOnAction( new EventHandler() { @Override public void handle(final ActionEvent e) { File file1 = fileChooser.showOpenDialog(stage); if (file1 != null) { /** * Beginning timestamp to track processing time */ final long timeStart = System.currentTimeMillis(); /** * Integer counter for number of sequences downloaded */ int seqCount = 0; // Pull directory name from jobID text dirName = jobID.getText(); /** * The specified sequence size limit. */ int sizeLimitI = Integer.parseInt(sizeLimit.getText()); /** * Condition if there is no specified sequence size limit */ boolean noLimit = false; if (sizeLimitI == 0) { noLimit = true; } /** * String container for output sequences */ String seqF = ""; /** * String container for runtime information output. */ String repF = "Job ID#: " + dirName + ", Runtime Information" + System.lineSeparator() + System.lineSeparator(); /** * String container for nucleotide accession number, as defined by ChrAccVer tags */ String nucID = ""; /** * String container for sequence start position, defined by ChrStart tags */ String nucStart = ""; /** * String container for sequence end position, defined by ChrStart tags */ String nucEnd = ""; /** * Boolean to determine if the fetch operation should be performed */ boolean grabSeq = false; /** * The directory created by the program, wherein files are written */ File dir = new File(dirName); dir.mkdir(); /** * String used to hold pathnames */ String pathName = dirName + "\\nucSeqfetch_result.txt"; /** * File to which results will be written during run. */ File file3 = new File(pathName); pathName = dirName + "\\nucSeqfetch_report.txt"; /** * File to which runtime information is written upon program conclusion. */ File file4 = new File(pathName); // Begin Block of Code for Processing File try (BufferedReader br = new BufferedReader(new FileReader(file1))) { String line; while ((line = br.readLine()) != null) { // process the line. if (line.contains("")) { line = line.replaceAll("(.*)", ""); line = line.replaceAll("", ""); nucID = line; } else if (line.contains("")) { line = line.replaceAll("(.*)", ""); line = line.replaceAll("", ""); nucStart = line; } else if (line.contains("")) { line = line.replaceAll("(.*)", ""); line = line.replaceAll("", ""); nucEnd = line; /** * Integer container for sequence start position */ int nucStartI = Integer.parseInt(nucStart); /** * Integer container for sequence end position */ int nucEndI = Integer.parseInt(nucEnd); if (nucStartI == 0) { nucStartI = 1; } if (nucEndI == 0) { nucEndI = 1; } /** * The total size of the sequence; calculated to prevent attempts on very large sequences. */ int seqSize = Math.abs(nucStartI - nucEndI); grabSeq = true; if (seqSize > sizeLimitI && !noLimit) { grabSeq = false; } } if (grabSeq) { /** * E-utility URL, built from nucleotide accession, start, and stop values. */ URL url = new URL("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=" + nucID + "&seq_start=" + nucStart + "&seq_stop=" + nucEnd + "&rettype=fasta&retmode=text"); URLConnection con = url.openConnection(); InputStream is = con.getInputStream(); BufferedReader br2 = new BufferedReader(new InputStreamReader(is)); /** * String container for nucleotide lines. */ String line2 = null; // read each line and write to file while ((line2 = br2.readLine()) != null) { if(line2.contains(">")) { if (!line2.contains("[")) { line2 = line2.replaceFirst("\\s", " ["); } if (line2.contains(", ")) { line2 = line2.replaceAll(",(.*)", ""); } else if (line2.contains("complete genome") || line2.contains("partial sequence") || line2.contains("chromosome") ) { line2 = line2.replaceAll("complete genome", ""); line2 = line2.replaceAll("partial sequence", ""); line2 = line2.replaceAll("chromosome", "") ; } if (!line2.contains("]")) { line2 += "]"; } } else if (comboBox1.getValue() == "RNA") { line2 = line2.replaceAll("T", "U"); line2 = line2.replaceAll("t", "u"); } writeFile(line2 + System.lineSeparator(), file3); } seqCount++; grabSeq = false; } } } catch (FileNotFoundException ex) { Logger.getLogger(nucSeqfetch.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(nucSeqfetch.class.getName()).log(Level.SEVERE, null, ex); } /** * Timestamp to determine the end of the program run and calculate processing time */ final long timeEnd = System.currentTimeMillis(); repF += "Source Filename: " + file1.getName() + System.lineSeparator(); repF += seqCount + " sequences retrieved" + System.lineSeparator(); repF += Long.toString(timeEnd - timeStart) + " milliseconds of runtime"; // Write the result to the file, report success writeFile(repF, file4); lbl2.setText("Files Saved!"); openFile(dir); } else { lbl2.setText("No files selected, please try again"); } } } ); stage.setScene(new Scene(rootGroup)); stage.show(); } public static void main(String[] args) { Application.launch(args); } private static void configureFileChooser( final FileChooser fileChooser) { fileChooser.setTitle("View Files"); fileChooser.setInitialDirectory( new File(System.getProperty("user.home")) ); fileChooser.getExtensionFilters().addAll( new FileChooser.ExtensionFilter("Plain Text", "*.txt"), new FileChooser.ExtensionFilter("FASTA", "*.fasta"), new FileChooser.ExtensionFilter("Rich Text Format", "*.rtf"), new FileChooser.ExtensionFilter("All Files", "*.*") ); } }