import java.awt.Desktop; import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Arrays; import java.util.Scanner; import java.util.logging.Level; import java.util.logging.Logger; /* * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Main.java to edit this template */ /** * * @author devincamenares */ public class GavinProject1 { public static void main(String[] args) throws IOException, InterruptedException { /** * A timestamp, used for generating unique file IDs */ final long timeUnique = System.currentTimeMillis(); /** * A directory name, created based upon timestamp. */ String dirName = Long.toString(timeUnique); // THIS BLOCK IS FOR GETTING USER INPUT System.out.println("Let's Begin - Choose your first nucleotide - A, C, G, or T"); Scanner scanner = new Scanner(System.in); boolean choiceGood = false; String userFirstChoice = ""; String userSecondChoice = ""; while(!choiceGood){ userFirstChoice = scanner.nextLine(); userFirstChoice = userFirstChoice.toUpperCase(); if(userFirstChoice.equals("A") || userFirstChoice.equals("G") || userFirstChoice.equals("C") || userFirstChoice.equals("T")){ choiceGood = true; } else { System.out.println("Hmm... you picked " + userFirstChoice + ". That doesn't seem right - it needs to be an A, T, C, or G. Try again!"); } } System.out.println("Excellent. Now, what will it mutate to? Pick A, T, C, or G, but not the same as before! It could also be - to indicate a deletion"); choiceGood = false; while(!choiceGood) { userSecondChoice = scanner.nextLine(); userSecondChoice = userSecondChoice.toUpperCase(); if(!userSecondChoice.equals(userFirstChoice)) { if(userSecondChoice.equals("A") || userSecondChoice.equals("G") || userSecondChoice.equals("C") || userSecondChoice.equals("T") || userSecondChoice.equals("-")){ choiceGood = true; } else { System.out.println("Hmm... you picked " + userSecondChoice + ". That doesn't seem right - it needs to be an A, T, C, G, or -. Try again!"); } } else { System.out.println("Whoops! You picked the same nucleotide as before (" + userFirstChoice + ">" + userSecondChoice + "). Try again!"); } } String repSuggest = "N"; String[] theChoices = {userFirstChoice, userSecondChoice}; Arrays.sort(theChoices); if(!userSecondChoice.equals("-")) { switch(theChoices[0]+theChoices[1]){ case "AT": repSuggest = "W"; break; case "AG": repSuggest = "R"; break; case "AC": repSuggest = "M"; break; case "CT": repSuggest = "Y"; break; case "CG": repSuggest = "S"; break; case "GT": repSuggest = "K"; break; } } else { repSuggest = "-"; } System.out.println("Now, when retrieving the sequence, what should I use to represent the mutated residue? Based on your mutation choice, I recommend a " + repSuggest + ". You can leave this blank to use the recommendation"); String representMut = userDefault(repSuggest); System.out.println("Thanks, processing..."); System.out.println("Your original nucleotide was " + userFirstChoice + " and this mutated to a " + userSecondChoice + ". This will be represented in the final result as a " + representMut); System.out.println("******************************************"); System.out.println("What search terms do you want to use to filter results? For the entire database, type all[sb] or leave blank by just hitting enter."); String searchTerm = userDefault("all[sb]"); System.out.println("What entry # from the search do you want to begin at? Please enter a number (Default is 1)"); String retMaxS; int retStart = userDefault(1); System.out.println("Now, how many entries do you want to scan? Please enter a number (Default is 1000)"); int retMax = userDefault(1000); System.out.println("OK, what range of flanking sequences do you want - please enter a number (Default is 100)"); int flank = userDefault(100); System.out.println("NCBI doesn't like frequent requests to their server. By how many miliseconds should I delay each nucleotide request? Please enter a number - 350 is recommended, skip to keep this default value"); int timeDelay = userDefault(350); System.out.println("NCBI also doesn't like retrieving too many entries from a single URL. What size do you want each batch to be? Please enter a number, at or less than 200 is recommended, skip to keep this default value."); int batchSize = userDefault(200); System.out.println("This program could take awhile to run. Approximately how often, in seconds, would you like an update on the progress? The default value is 300 - or about five minutes"); int progressReport = userDefault(300); String inputSummary = "Great! I will search using " + searchTerm + " to look for " + retMax + " entries starting at result " + retStart + " that match a mutation of " + userFirstChoice + " to " + userSecondChoice + ". If I find any, I'll wait " + timeDelay + " miliseconds and then report back " + flank + "bp flanking either side of the mutation, which will be indicated as " + representMut + "." + System.lineSeparator(); System.out.println(inputSummary + "*** Here we go! ***" + System.lineSeparator()); // BLOCK TO SETUP WRITING TO FILE /** * The directory created by the program, wherein files are written File dir = new File(dirName); dir.mkdir(); This doesn't work on mac for some reason. * */ /** * String used to hold pathnames */ String pathName = dirName + "_" + userFirstChoice + "_" + userSecondChoice + ".txt"; /** * File to which runtime information is written upon program conclusion. */ File file1 = new File(pathName); writeFile(inputSummary + System.lineSeparator(), file1); // BLOCK TO PROCESS REQUEST String ids = ""; int entriesSearched = 0; int mutationsQueried = 0; int sequencesCollected = 0; ArrayList multipleMut = new ArrayList<>(); ArrayList idsBatches = new ArrayList<>(); int batchCounter = 0; /** * E-utility URL, built from searchTerm (all) and retMax and retStart */ URL url1 = new URL("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=snp&term=" + searchTerm + "&retmax=" + retMax + "&retstart=" + retStart); URLConnection con1 = url1.openConnection(); try (InputStream is1 = con1.getInputStream()) { BufferedReader br1 = new BufferedReader(new InputStreamReader(is1)); /** * String container for nucleotide lines. */ String line1 = null; // read each line and write to file while ((line1 = br1.readLine()) != null) { if(line1.contains("")) { //line1 = line1.replace("", ""); //line1 = line1.replace("", ""); line1 = line1.replaceAll("[^\\d.]", ""); entriesSearched++; batchCounter++; //writeFile(line1 + System.lineSeparator(), file1); ids += line1 + ","; ids = ids.trim(); if(batchCounter == batchSize) { ids = ids.substring(0, ids.length() - 1); idsBatches.add(ids); batchCounter = 0; ids = ""; } } } is1.close(); } if(batchCounter != 0) { ids = ids.substring(0, ids.length() - 1); idsBatches.add(ids); }; // BLOCK FOR 2ND WEBSITE //ArrayList nucID = new ArrayList(); //ArrayList nucPos = new ArrayList(); //ArrayList orgNt = new ArrayList(); //ArrayList newNt = new ArrayList(); ArrayList spdiArray = new ArrayList(); long timeUpdate = System.currentTimeMillis(); int updateCounter = 0; for (int j = 0; j < idsBatches.size(); j++) { //System.out.println(idsBatches.get(j)); // This short block of code provides the user with regular updates, every 5 minutes. long timeExpired = (System.currentTimeMillis() - timeUpdate)/1000; if (timeExpired > progressReport) { updateCounter++; System.out.println("It's been about " + Math.round((progressReport/60)*updateCounter) + " minutes, and so far I have collected " + sequencesCollected + " sequences by checking " + mutationsQueried + " mutations across " + entriesSearched + " database entries in " + j + " batches."); timeUpdate = System.currentTimeMillis(); System.out.println("Now, back to work...."); } /** * E-utility URL, built from searchTerm (all) and retMax and retStart */ URL url2 = new URL("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=snp&id=" + idsBatches.get(j)); URLConnection con2 = url2.openConnection(); try (InputStream is2 = con2.getInputStream()) { BufferedReader br2 = new BufferedReader(new InputStreamReader(is2)); String line2 = null; String spdiIndex = null; while ((line2 = br2.readLine()) != null) { if(line2.contains("")) { line2 = line2.replace("", ""); line2 = line2.replace("", ""); line2 = line2.trim(); String[] spdiX = line2.split(","); // Keeps track of how many have multiple mutations if(spdiX.length > multipleMut.size()) { multipleMut.add(1); } else { multipleMut.set(spdiX.length-1, multipleMut.get(spdiX.length-1)+1); } for (int i = 0; i < spdiX.length; i++) { mutationsQueried++; /* The next few lines are to catch any SPDI mutation entries that go to a deletion, which is a blank in the current system */ String[] spdiInfoTemp = spdiX[i].split(":"); String[] spdiInfo = new String[4]; spdiInfo[0] = spdiInfoTemp[0]; spdiInfo[1] = spdiInfoTemp[1]; spdiInfo[2] = spdiInfoTemp[2]; if (spdiInfoTemp.length > 3) { spdiInfo[3] = spdiInfoTemp[3]; } else { spdiInfo[3] = "-"; } //System.out.println(spdiIndex + " " + line2); //System.out.println(">" + spdiIndex + "_" + spdiInfo[0] + "_" + spdiInfo[1] + "_" + spdiInfo[2] + "_" + spdiInfo[3]); if(spdiInfo[2].equals(userFirstChoice) && spdiInfo[3].equals(userSecondChoice)) { sequencesCollected++; spdiArray.add(spdiInfo); String header = ">" + spdiIndex + "_" + spdiInfo[0] + "_" + spdiInfo[1] + "_" + spdiInfo[2] + "_" + spdiInfo[3] + " "; // BLOCK FOR 3RD WEBSITE // Pause Execution - cannot be more than 3 every second. Thread.sleep(timeDelay); int mutPos = Integer.parseInt(spdiInfo[1]); int startPos = mutPos - flank; int endPos = mutPos + flank; String seqResult = ""; /** * E-utility URL, built from searchTerm (all) and retMax and retStart */ //System.out.println("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=" + spdiInfo[0] + "&seq_start=" + startPos + "&seq_stop=" + endPos + "&rettype=fasta"); URL url3 = new URL("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=" + spdiInfo[0] + "&seq_start=" + startPos + "&seq_stop=" + endPos + "&rettype=fasta"); URLConnection con3 = url3.openConnection(); try (InputStream is3 = con3.getInputStream()) { BufferedReader br3 = new BufferedReader(new InputStreamReader(is3)); String line3 = null; while ((line3 = br3.readLine()) != null) { if(line3.contains(">")) { header += line3.replace(">", "") + System.lineSeparator(); } else { seqResult += line3; } } is3.close(); } seqResult = seqResult.substring(0, flank+1) + representMut + seqResult.substring(flank+2); writeFile(header+seqResult + System.lineSeparator() + System.lineSeparator(), file1); } } } else if (line2.contains("uid")) { line2 = line2.replaceAll("[^\\d.]", ""); spdiIndex = line2; } } is2.close(); } } //END BLOCK FOR 2ND STREAM // BLOCK TO FINALIZE AND OPEN FILE long endTime = (System.currentTimeMillis() - timeUnique)/1000; writeFile("Phew! It took me about " + endTime + " seconds, but I collected " + sequencesCollected + " sequences by checking " + mutationsQueried + " mutations across " + entriesSearched + " database entries, retrieving them in " + idsBatches.size() + " batches of up to " + batchSize + " sequences each." + System.lineSeparator(), file1); writeFile("The entries scanned had the following properties: ", file1); for (int i = 0; i < multipleMut.size(); i++) { writeFile(multipleMut.get(i) + " entries had " + (i+1) + " mutation(s). ", file1); } writeFile(System.lineSeparator() + "Good luck with your results. Live long and prosper!", file1); System.out.println("I'm done! Look ma, results!"); openFile(file1); } private static Desktop desktop = Desktop.getDesktop(); private static void writeFile(String content, File file){ try (FileWriter fw = new FileWriter(file, true)) { fw.write(content);// } catch (IOException ex) { Logger.getLogger(GavinProject1.class.getName()).log(Level.SEVERE, null, ex); } } private static void openFile(File file) { try { desktop.open(file); } catch (IOException ex) { Logger.getLogger( GavinProject1.class.getName()).log( Level.SEVERE, null, ex ); } } private static int userDefault(int defaultVal) { Scanner scanner = new Scanner(System.in); String inputS; boolean choiceGood = false; while(!choiceGood) { try{ inputS = scanner.nextLine(); if(inputS.equals("")) { System.out.println("Great, using the default value of " + defaultVal); return defaultVal; } else { return Integer.parseInt(inputS); } } catch (NumberFormatException ex){ System.out.println("Whoops! looks like you didn't provide a valid number. Please try again!"); choiceGood = false; } } System.out.println("Great, using the default value of " + defaultVal); return defaultVal; } private static String userDefault(String defaultVal) { Scanner scanner = new Scanner(System.in); String inputS = scanner.nextLine(); if(inputS.equals("")) { System.out.println("Great, using the default value of " + defaultVal); return defaultVal; } else { return inputS; } } }