Java源码示例:htsjdk.samtools.util.StringUtil
示例1
public void run() {
final int ITERATIONS = 1000000;
final String[] fields = new String[10000];
final StopWatch watch = new StopWatch();
watch.start();
for (int i=0; i<ITERATIONS; ++i) {
if (StringUtil.split(TEXT, fields, '\t') > 100) {
System.out.println("Mama Mia that's a lot of tokens!!");
}
}
watch.stop();
System.out.println("StringUtil.split() took " + watch.getElapsedTime());
watch.reset();
watch.start();
for (int i=0; i<ITERATIONS; ++i) {
if (split(TEXT, fields, "\t") > 100) {
System.out.println("Mama Mia that's a lot of tokens!!");
}
}
watch.stop();
System.out.println("StringTokenizer took " + watch.getElapsedTime());
}
示例2
private void writeMetrics (final boolean writeEditDistanceDistribution, final String context, final AdaptiveMappingResult r, final PrintStream out) {
if (out==null) return;
List<EditDistanceMappingMetric> metricList= r.getMetricResult();
for (EditDistanceMappingMetric edmm: metricList) {
edmm.getOriginalObservations();
// Steve reports the number of barcodes including the one that everything is merged into.
List<String> line = new ArrayList<>(Arrays.asList(context, edmm.getBarcode(), Integer.toString(edmm.getNumMergedBarcodes()+1), Integer.toString(edmm.getEditDistanceDiscovered()), Integer.toString(edmm.getEditDistanceUsed()),
Integer.toString(edmm.getOriginalObservations()), Integer.toString(edmm.getTotalObservations())));
if (writeEditDistanceDistribution) {
int [] edList = edmm.getEdList();
if (edList.length>0) {
Integer[] x = Arrays.stream( edList ).boxed().toArray( Integer[]::new );
String edFormatted = StringUtil.join(",", x);
line.add(edFormatted);
} else
line.add("NA");
}
out.println(StringUtil.join("\t", line));
}
}
示例3
/**
* Test to see if this read matches this barcode. If any base of a barcode
* starts with N or n, then ignore that position.
*
* @param testString
* The read to look for this barcode in. The barcode should be at
* the start of the read for this method. The entire barcode is expected for a match.
* @return true if this barcode is found in the read.
*/
public boolean hasForwardMatch(final String testString) {
byte[] testBases = StringUtil.stringToBytes(testString);
int numBasesCanMatch = 0;
int numBasesMatch = 0;
for (int i = 0; i < bases.length; i++) {
if (isIgnoreBase(this.bases[i]))
continue;
numBasesCanMatch++;
if (SequenceUtil.basesEqual(testBases[i], bases[i]))
numBasesMatch++;
}
if (numBasesCanMatch == numBasesMatch)
return (true);
return false;
}
示例4
@Test(enabled=true)
/**
* Add an uneven number of bases and quals to trip the exception throw.
*/
public void testAddBaseQualsError () {
int snpPos=76227022;
Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
char [] bases = {'A', 'A'};
byte [] quals = {27,17,55};
byte [] bases2 = new byte [bases.length];
StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
boolean passes=false;
try {
p.setBasesAndQualities(bases2, quals);
} catch (IllegalArgumentException e) {
Assert.assertNotNull(e);
passes=true;
}
Assert.assertTrue(passes);
}
示例5
@Test(enabled=true)
public void testAddBaseQuals () {
int snpPos=76227022;
Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
char [] bases = {'A', 'A'};
byte [] quals = {27,55};
byte [] bases2 = new byte [bases.length];
StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
boolean passes=true;
try {
p.setBasesAndQualities(bases2, quals);
} catch (IllegalArgumentException e) {
passes=false;
}
Assert.assertTrue(passes);
}
示例6
@Test(enabled=true)
public void testMixedLikelihoodMultiRead () {
GenotypeType [] g = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
List<GenotypeType> genotypes = Arrays.asList(g);
Double [] m = {new Double(2), new Double(1), new Double(1)};
List<Double> mixture = Arrays.asList(m);
char refAllele ='A';
char altAllele ='T';
Byte [] b = {StringUtil.charToByte('A'), StringUtil.charToByte('A')};
List<Byte> bases = Arrays.asList(b);
Byte [] q = {new Byte ((byte)10), new Byte ((byte)10)};
List<Byte> qualities =Arrays.asList(q);
double result = LikelihoodUtils.getInstance().getLogLikelihoodMixedModel(refAllele, altAllele, genotypes, mixture, bases, qualities, null, null, null);
Assert.assertEquals(result, Math.log10(0.36), 0.001);
}
示例7
private final String alterBaseString(final String baseString, final int numChanges) {
final byte[] bases = StringUtil.stringToBytes(baseString);
if (numChanges > baseString.length())
throw new IllegalArgumentException("Too many changes requested");
final Set<Integer> mutatedPositions = new HashSet<>();
int changesSoFar = 0;
while (changesSoFar < numChanges) {
int positionToChange = random.nextInt(bases.length);
while (mutatedPositions.contains(positionToChange))
positionToChange = random.nextInt(bases.length);
mutatedPositions.add(positionToChange);
bases[positionToChange] = alterBase(bases[positionToChange]);
++changesSoFar;
}
return StringUtil.bytesToString(bases);
}
示例8
@Override
protected String[] customCommandLineValidation() {
IOUtil.assertDirectoryIsReadable(BASECALLS_DIR);
final List<String> errors = new ArrayList<>();
for (final Integer lane : LANES) {
if (lane < 1) {
errors.add(
"LANES must be greater than or equal to 1. LANES passed in " + StringUtil.join(", ", LANES));
break;
}
}
if (errors.isEmpty()) {
return null;
} else {
return errors.toArray(new String[errors.size()]);
}
}
示例9
/**
* Create one SAMSequenceRecord from a single fasta sequence
*/
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());
// Compute MD5 of upcased bases
final byte[] bases = refSeq.getBases();
for (int i = 0; i < bases.length; ++i) {
bases[i] = StringUtil.toUpperCase(bases[i]);
}
ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
if (GENOME_ASSEMBLY != null) {
ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
}
ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
if (SPECIES != null) {
ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
}
return ret;
}
示例10
public File getRefFlatFile(String sequence) throws Exception {
// Create a refFlat file with a single gene containing two exons, one of which is overlapped by the
// ribosomal interval.
final String[] refFlatFields = new String[RefFlatColumns.values().length];
refFlatFields[RefFlatColumns.GENE_NAME.ordinal()] = "myGene";
refFlatFields[RefFlatColumns.TRANSCRIPT_NAME.ordinal()] = "myTranscript";
refFlatFields[RefFlatColumns.CHROMOSOME.ordinal()] = sequence;
refFlatFields[RefFlatColumns.STRAND.ordinal()] = "+";
refFlatFields[RefFlatColumns.TX_START.ordinal()] = "49";
refFlatFields[RefFlatColumns.TX_END.ordinal()] = "500";
refFlatFields[RefFlatColumns.CDS_START.ordinal()] = "74";
refFlatFields[RefFlatColumns.CDS_END.ordinal()] = "400";
refFlatFields[RefFlatColumns.EXON_COUNT.ordinal()] = "2";
refFlatFields[RefFlatColumns.EXON_STARTS.ordinal()] = "49,249";
refFlatFields[RefFlatColumns.EXON_ENDS.ordinal()] = "200,500";
final File refFlatFile = File.createTempFile("tmp.", ".refFlat");
refFlatFile.deleteOnExit();
final PrintStream refFlatStream = new PrintStream(refFlatFile);
refFlatStream.println(StringUtil.join("\t", refFlatFields));
refFlatStream.close();
return refFlatFile;
}
示例11
/**
* Call this method to create a ClusterData iterator over the specified tiles.
*
* @return An iterator for reading the Illumina basecall output for the lane specified in the constructor.
*/
public BaseIlluminaDataProvider makeDataProvider(List<Integer> requestedTiles) {
if (requestedTiles == null) {
requestedTiles = availableTiles;
} else {
if (requestedTiles.isEmpty()) {
throw new PicardException("Zero length tile list supplied to makeDataProvider, you must specify at least 1 tile OR pass NULL to use all available tiles");
}
}
final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataType = new HashMap<>();
for (final Map.Entry<SupportedIlluminaFormat, Set<IlluminaDataType>> fmToDt : formatToDataTypes.entrySet()) {
parsersToDataType.put(makeParser(fmToDt.getKey(), requestedTiles), fmToDt.getValue());
}
log.debug("The following parsers will be used by IlluminaDataProvider: " + StringUtil.join("," + parsersToDataType.keySet()));
return new IlluminaDataProvider(outputMapping, parsersToDataType, basecallDirectory, lane);
}
示例12
/**
* Assert that expectedCols are present and return actualCols - expectedCols
*
* @param actualCols The columns present in the LIBRARY_PARAMS file
* @param expectedCols The columns that are REQUIRED
* @return actualCols - expectedCols
*/
private Set<String> findAndFilterExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
final Set<String> missingColumns = new HashSet<>(expectedCols);
missingColumns.removeAll(actualCols);
if (!missingColumns.isEmpty()) {
throw new PicardException(String.format(
"LIBRARY_PARAMS file %s is missing the following columns: %s.",
LIBRARY_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns
)));
}
final Set<String> remainingColumns = new HashSet<>(actualCols);
remainingColumns.removeAll(expectedCols);
return remainingColumns;
}
示例13
/**
* Given a set of columns assert that all columns conform to the format of an RG header attribute (i.e. 2 letters)
* the attribute is NOT a member of the rgHeaderTags that are built by default in buildSamHeaderParameters
*
* @param rgTagColumns A set of columns that should conform to the rg header attribute format
*/
private void checkRgTagColumns(final Set<String> rgTagColumns) {
final Set<String> forbiddenHeaders = buildSamHeaderParameters(null).keySet();
forbiddenHeaders.retainAll(rgTagColumns);
if (!forbiddenHeaders.isEmpty()) {
throw new PicardException("Illegal ReadGroup tags in library params(barcode params) file(" + LIBRARY_PARAMS.getAbsolutePath() + ") Offending headers = " + StringUtil.join(", ", forbiddenHeaders));
}
for (final String column : rgTagColumns) {
if (column.length() > 2) {
throw new PicardException("Column label (" + column + ") unrecognized. Library params(barcode params) can only contain the columns " +
"(OUTPUT, LIBRARY_NAME, SAMPLE_ALIAS, BARCODE, BARCODE_<X> where X is a positive integer) OR two letter RG tags!");
}
}
}
示例14
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
final SAMRecord srec = new SAMRecord(header);
srec.setReadName(baseName);
srec.setReadString(frec.getReadString());
srec.setReadUnmappedFlag(true);
srec.setAttribute(ReservedTagConstants.READ_GROUP_ID, READ_GROUP_NAME);
final byte[] quals = StringUtil.stringToBytes(frec.getBaseQualityString());
convertQuality(quals, QUALITY_FORMAT);
for (final byte qual : quals) {
final int uQual = qual & 0xff;
if (uQual < MIN_Q || uQual > MAX_Q) {
throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
MAX_Q + " for read " + frec.getReadHeader());
}
}
srec.setBaseQualities(quals);
if (paired) {
srec.setReadPairedFlag(true);
srec.setMateUnmappedFlag(true);
}
return srec ;
}
示例15
/** Returns read baseName and asserts correct pair read name format:
* <ul>
* <li> Paired reads must either have the exact same read names or they must contain at least one "/"
* <li> and the First pair read name must end with "/1" and second pair read name ends with "/2"
* <li> The baseName (read name part before the /) must be the same for both read names
* <li> If the read names are exactly the same but end in "/2" or "/1" then an exception will be thrown
* </ul>
*/
String getBaseName(final String readName1, final String readName2, final FastqReader freader1, final FastqReader freader2) {
String [] toks = getReadNameTokens(readName1, 1, freader1);
final String baseName1 = toks[0] ;
final String num1 = toks[1] ;
toks = getReadNameTokens(readName2, 2, freader2);
final String baseName2 = toks[0] ;
final String num2 = toks[1];
if (!baseName1.equals(baseName2)) {
throw new PicardException(String.format("In paired mode, read name 1 (%s) does not match read name 2 (%s)", baseName1,baseName2));
}
final boolean num1Blank = StringUtil.isBlank(num1);
final boolean num2Blank = StringUtil.isBlank(num2);
if (num1Blank || num2Blank) {
if(!num1Blank) throw new PicardException(error(freader1,"Pair 1 number is missing (" +readName1+ "). Both pair numbers must be present or neither.")); //num1 != blank and num2 == blank
else if(!num2Blank) throw new PicardException(error(freader2, "Pair 2 number is missing (" +readName2+ "). Both pair numbers must be present or neither.")); //num1 == blank and num =2 != blank
} else {
if (!num1.equals("1")) throw new PicardException(error(freader1,"Pair 1 number must be 1 ("+readName1+")"));
if (!num2.equals("2")) throw new PicardException(error(freader2,"Pair 2 number must be 2 ("+readName2+")"));
}
return baseName1 ;
}
示例16
private void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName){
MetricsUtils.saveMetrics(metrics, out);
if (metrics.getAllHistograms().isEmpty()) {
logger.warn("No valid bases found in input file.");
} else if (chartOutput != null){
// Now run R to generate a chart
// If we're working with a single library, assign that library's name
// as a suffix to the plot title
final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();
/*
* A subtitle for the plot, usually corresponding to a library.
*/
String plotSubtitle = "";
if (readGroups.size() == 1) {
plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
}
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(getMeanQualityByCycleRScriptResource());
executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
executor.exec();
}
}
示例17
/**
* Fills a halfContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
*/
public void fillHalfRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
final String padding = StringUtil.repeatCharNTimes('N', contextSize);
for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
final String fullContextKey = fullContext.getKey();
final char centralBase = fullContextKey.charAt(contextSize);
final String leadingContextKey = fullContextKey.substring(0, contextSize) + centralBase + padding;
final String trailingContextKey = padding + centralBase + fullContextKey.substring(contextSize + 1, fullContextKey.length());
final AlignmentAccumulator[] trailingAlignmentAccumulators = this.artifactMap.get(trailingContextKey);
final AlignmentAccumulator[] leadingAlignmentAccumulators = this.artifactMap.get(leadingContextKey);
final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();
for (int i=0; i < fullAlignmentAccumulators.length; i++) {
trailingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
leadingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
}
}
}
示例18
/**
* Fills a zeroContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
*/
public void fillZeroRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
final String padding = StringUtil.repeatCharNTimes('N', contextSize);
for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
final String fullContextKey = fullContext.getKey();
final char centralBase = fullContextKey.charAt(contextSize);
final String zeroContextKey = padding + centralBase + padding;
final AlignmentAccumulator[] zeroAlignmentAccumulators = this.artifactMap.get(zeroContextKey);
final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();
for (int i=0; i < fullAlignmentAccumulators.length; i++) {
zeroAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
}
}
}
示例19
private void convertParamsFile(String libraryParamsFile, int concatNColumnFields, File testDataDir, File outputDir, File libraryParams, List<File> outputPrefixes) throws FileNotFoundException {
try (LineReader reader = new BufferedLineReader(new FileInputStream(new File(testDataDir, libraryParamsFile)))) {
final PrintWriter writer = new PrintWriter(libraryParams);
final String header = reader.readLine();
writer.println(header + "\tOUTPUT_PREFIX");
while (true) {
final String line = reader.readLine();
if (line == null) {
break;
}
final String[] fields = line.split("\t");
final File outputPrefix = new File(outputDir, StringUtil.join("", Arrays.copyOfRange(fields, 0, concatNColumnFields)));
outputPrefixes.add(outputPrefix);
writer.println(line + "\t" + outputPrefix);
}
writer.close();
}
}
示例20
private void validateReferenceBases(File referenceFile) {
final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
ReferenceSequence sequence;
while ((sequence = refSeqFile.nextSequence()) != null) {
for (final byte base: sequence.getBases()) {
if (!IUPAC_TABLE[base]) {
messages.baseErrors = String.format("WARNING: AT least one invalid base '%c' (decimal %d) in reference sequence named %s",
StringUtil.byteToChar(base), base, sequence.getName());
break;
}
}
}
}
示例21
public int getCountBase (final char base) {
Byte baseB = StringUtil.charToByte(base);
int count=0;
for (Byte b: bases)
if (b.equals(baseB)) count++;
return count;
}
示例22
private static Tuple2<TestDataBreakEndVariants, TestDataBreakEndVariants> forInterChromosomeStrandSwitch55() {
String contigName = "forInterChromosomeStrandSwitch55";
byte[] contigSequence = "ATTCTGAGAAACTTCATTTTGATGTGTGCATTCATCTTCCAGAGTTGAAACTTTCTTTTGATTGTGTAGTTTTGAAACACTCTTTTTGTAGAATCTGCAAGGGGGTATTTGTAGGGATTTGAAGCCTATTGTTGAAAAGGTAATATCTTCACATAAAAACTACATAGGATCATTCGGAGAAACTTCTTTGTGATGTGTGCATTCAACTCACAGAGTTGAACCTATCTTTTTTTTTTTAATGTCTGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGAGCAATTTGAGGCCTAAGGTGGAAAAGGAAATATTTTCACATAAAAACTAGACAGAAGAATTCTGTGAAACTTGTTCAGGACCTGTGCATTCATCTTACAGATTTGAATCTTTCTTTTGATTGAGCAGTTTGGAAACACTGTTTTTGTAGAATCTTCAGGTGGACATTCAGAGCACTTTGTGTCCTATGGTAGAAAAGGAAATATCTTCATA".getBytes();
String homology = "";
String insSeq = StringUtil.bytesToString(getReverseComplimentCopy(Arrays.copyOfRange(contigSequence, 230, 292)));
AlignmentInterval firstAlignment = new AlignmentInterval(new SimpleInterval("chr21:10784600-10784829"), 1, 230, TextCigarCodec.decode("230M279S"), true, 60, 12, 170, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval secondAlignment = new AlignmentInterval(new SimpleInterval("chr20:28817762-28817977"), 293, 509, TextCigarCodec.decode("292H93M1I123M"), false, 60, 11, 149, ContigAlignmentsModifier.AlnModType.NONE);
SimpleChimera simpleChimera = new SimpleChimera(contigName, firstAlignment, secondAlignment, StrandSwitch.FORWARD_TO_REVERSE, false, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
SimpleInterval expectedLeftBreakpoint = new SimpleInterval("chr20:28817977-28817977");
SimpleInterval expectedRightBreakpoint = new SimpleInterval("chr21:10784829-10784829");
final BreakpointComplications expectedBreakpointComplications = new BreakpointComplications.InterChromosomeBreakpointComplications(homology, insSeq);
NovelAdjacencyAndAltHaplotype expectedNovelAdjacencyAndAltSeq = new NovelAdjacencyAndAltHaplotype(expectedLeftBreakpoint, expectedRightBreakpoint, StrandSwitch.FORWARD_TO_REVERSE, expectedBreakpointComplications, TypeInferredFromSimpleChimera.INTER_CHR_STRAND_SWITCH_55, EMPTY_BYTE_ARRAY);
final List<SvType> expectedSVTypes = Arrays.asList(
makeBNDType("chr20", 28817977, "BND_chr20_28817977_chr21_10784829_1", Allele.create("A", true), Allele.create("A"+insSeq+"]chr21:10784829]"), Collections.emptyMap(), true, BreakEndVariantType.SupportedType.INTER_CHR_STRAND_SWITCH_55),
makeBNDType("chr21", 10784829, "BND_chr20_28817977_chr21_10784829_2", Allele.create("T", true), Allele.create("T"+ SequenceUtil.reverseComplement(insSeq) +"]chr20:28817977]"), Collections.emptyMap(), false, BreakEndVariantType.SupportedType.INTER_CHR_STRAND_SWITCH_55)
);
final List<VariantContext> expectedVariants = Arrays.asList(
addStandardAttributes(makeBND(expectedLeftBreakpoint, expectedRightBreakpoint, Allele.create("A", true), insSeq, "", true, true, true), contigName, 60, 216, homology, insSeq, "BND_chr20_28817977_chr21_10784829_2").make(),
addStandardAttributes(makeBND(expectedLeftBreakpoint, expectedRightBreakpoint, Allele.create("T", true), SequenceUtil.reverseComplement(insSeq), "", false, true, true), contigName, 60, 216, homology, insSeq, "BND_chr20_28817977_chr21_10784829_1").make()
);
final TestDataBreakEndVariants forInterChromosomeStrandSwitch55_plus =
new TestDataBreakEndVariants(firstAlignment, secondAlignment, contigName, contigSequence, true, simpleChimera, expectedNovelAdjacencyAndAltSeq, expectedSVTypes, expectedVariants, BreakpointsInference.InterChromosomeBreakpointsInference.class);
firstAlignment = new AlignmentInterval(new SimpleInterval("chr20:28817762-28817977"), 1, 217, TextCigarCodec.decode("123M1I93M292H"), true, 60, 11, 149, ContigAlignmentsModifier.AlnModType.NONE);
secondAlignment = new AlignmentInterval(new SimpleInterval("chr21:10784600-10784829"), 280, 509, TextCigarCodec.decode("279S230M"), false, 60, 12, 170, ContigAlignmentsModifier.AlnModType.NONE);
simpleChimera = new SimpleChimera(contigName, firstAlignment, secondAlignment, StrandSwitch.FORWARD_TO_REVERSE, true, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
final TestDataBreakEndVariants forInterChromosomeStrandSwitch55_minus =
new TestDataBreakEndVariants(firstAlignment, secondAlignment, contigName, getReverseComplimentCopy(contigSequence), false, simpleChimera, expectedNovelAdjacencyAndAltSeq, expectedSVTypes, expectedVariants, BreakpointsInference.InterChromosomeBreakpointsInference.class);
return new Tuple2<>(forInterChromosomeStrandSwitch55_plus, forInterChromosomeStrandSwitch55_minus);
}
示例23
public static byte [] setSequenceToN (final byte [] fastaRefBases, final Interval interval) {
byte [] result = fastaRefBases;
int startBase=interval.getStart();
int endBase=interval.getEnd();
// the byte [] is base 0, the coordinates are base 1.
Arrays.fill(result, startBase-1, endBase, StringUtil.charToByte('N'));
return (result);
}
示例24
private SAMFileWriter getWriter (final SamReader reader) {
SAMFileHeader header = reader.getFileHeader();
SamHeaderUtil.addPgRecord(header, this);
String context = StringUtil.join(" ", this.CONTEXT_TAGS);
header.addComment("Edit distance collapsed tag " + this.COLLAPSE_TAG + " to new tag " + this.OUT_TAG+ " with edit distance "+ this.EDIT_DISTANCE + "using indels=" + this.FIND_INDELS + " in the context of tags [" + context + "]");
SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, this.OUTPUT);
return writer;
}
示例25
private void writeReport (final BottomUpCollapseResult result, final BottomUpCollapseResult resultClean, final UMIsPerCellResult umiResult) {
PrintStream outReport = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.OUTPUT_REPORT));
// write comments section, each line starts with a "#"
outReport.println("# FILTER_AMBIGUOUS="+FILTER_AMBIGUOUS);
outReport.println("# MIN_UMIS_PER_CELL="+MIN_UMIS_PER_CELL);
outReport.println("# UMI_BIAS_THRESHOLD="+MIN_UMIS_PER_CELL);
outReport.println("# EDIT_DISTANCE="+MIN_UMIS_PER_CELL);
outReport.println("#");
outReport.println("# TOTAL_BARCODES_TESTED="+umiResult.getNumCellBarocodesTested());
outReport.println("# BARCODES_COLLAPSED="+result.getUnambiguousSmallBarcodes().size());
outReport.println("# ESTIMATED_UMIS_COLLAPSED="+getTotalAmbiguousUMIs(result.getUnambiguousSmallBarcodes(), umiResult.getUmisPerCell()));
outReport.println("# AMBIGUOUS_BARCODES="+result.getAmbiguousBarcodes().size());
outReport.println("# ESTIMATED_AMBIGUOUS_UMIS="+getTotalAmbiguousUMIs(result.getAmbiguousBarcodes(), umiResult.getUmisPerCell()));
outReport.println("# POLY_T_BIASED_BARCODES="+umiResult.getPolyTBiasedBarcodes());
outReport.println("# POLY_T_BIASED_BARRCODE_UMIS="+umiResult.getPolyTBiasedUMIs());
outReport.println("# POLY_T_POSITION="+umiResult.getPolyTPosition());
/// write header
String [] header= {"intended_barcode", "neighbor_barcode", "intended_size", "neighbor_size", "position", "intended_base", "neighbor_base", "repaired"};
outReport.println(StringUtil.join("\t", header));
ObjectCounter<String> umiCounts=umiResult.getUmisPerCell();
Iterator<String> smalls = result.getUnambiguousSmallBarcodes().iterator();
while (smalls.hasNext()) {
String small=smalls.next();
String large = result.getLargerRelatedBarcode(small);
BarcodeSubstitutionPair p = new BarcodeSubstitutionPair(large, small);
String cleanLarger = resultClean.getLargerRelatedBarcode(small);
boolean repaired = cleanLarger!=null;
String [] body = {large, small, Integer.toString(umiCounts.getCountForKey(large)), Integer.toString(umiCounts.getCountForKey(small)),
Integer.toString(p.getPosition()+1), p.getIntendedBase(), p.getNeighborBase(), Boolean.toString(repaired).toUpperCase()};
outReport.println(StringUtil.join("\t", body));
}
CloserUtil.close(outReport);
}
示例26
private String [] getFirstSplit(final SAMRecord rec) {
Object strIntervalRec = rec.getAttribute(tag);
if (!(strIntervalRec instanceof String))
throw new IllegalArgumentException(SAMTagUtil.getSingleton().makeStringTag(this.tag) + " does not have a String value");
String intervalString = (String) strIntervalRec;
String [] result = new String [4];
StringUtil.splitConcatenateExcessTokens(intervalString, result, ENCODE_DELIMITER);
return (result);
}
示例27
private int [] parsePosition (final String posString) {
String [] posArray = new String [2];
StringUtil.split(posString, posArray, '-');
int start=Integer.parseInt(posArray[0]);
// set the end = the start, this changes if there's a second position in the field.
int end=Integer.parseInt(posArray[0]);
if (posArray[1]!=null)
end = Integer.parseInt(posArray[1]);
int [] result = {start,end};
return result;
}
示例28
private void WriteDimensionLabels(final String dimensionName, final List<String> names) {
try {
for (int i = 0; i < names.size(); i += MatrixMarketConstants.NUM_HEADER_ELEMENTS_PER_ROW) {
writer.write(MatrixMarketConstants.MM_STRUCTURED_COMMENT_LINE_START);
writer.write(dimensionName);
writer.write(MatrixMarketConstants.MM_HEADER_LIST_SEPARATOR);
final int elementsToWrite = Math.min(MatrixMarketConstants.NUM_HEADER_ELEMENTS_PER_ROW, names.size() - i);
writer.write(StringUtil.join(MatrixMarketConstants.MM_HEADER_LIST_SEPARATOR, names.subList(i, i + elementsToWrite)));
writer.newLine();
}
} catch (IOException e) {
throw new RuntimeIOException("Exception writing " + filename, e);
}
}
示例29
public PolyARun getPolyAStart(final String readString, final String adapterSequence) {
final byte[] readBases = StringUtil.stringToBytes(readString);
int adapterClipPosition = ClippingUtility.findIndexOfClipSequence(
readBases,
StringUtil.stringToBytes(adapterSequence),
minAdapterMatch,
maxAdapterErrorRate);
if (adapterClipPosition == ClippingUtility.NO_MATCH) {
adapterClipPosition = readString.length();
} else if (adapterClipPosition == 0) {
return new PolyARun(0, 0, 0);
}
final SimplePolyAFinder.PolyARun ret = getPolyARun(readString, adapterClipPosition);
// If there was a short adapter match, but not enough poly A before it,
// see if there would be enough poly A if the adapter considered not to match.
if (ret.isNoMatch() && adapterClipPosition < readString.length() &&
adapterClipPosition + dubiousAdapterMatchLength >= readString.length()) {
// If did not find enough polyA looking before adapter, try again looking from end of read.
final SimplePolyAFinder.PolyARun tryWithoutAdapter = getPolyARun(readString, readString.length());
if (!tryWithoutAdapter.isNoMatch()) {
return tryWithoutAdapter;
}
}
return ret;
}
示例30
public TrimSequenceTemplate(final String sequence, final String ignoredBases) {
this.sequence = sequence;
this.reverseComplement = SequenceUtil.reverseComplement(this.sequence);
bases = StringUtil.stringToBytes(this.sequence);
rcBases = StringUtil
.stringToBytes(this.reverseComplement);
this.ignoredBases = StringUtil
.stringToBytes(ignoredBases);
}