Java源码示例:htsjdk.samtools.util.StringUtil

示例1
public void run() {
    final int ITERATIONS = 1000000;
    final String[] fields = new String[10000];
    final StopWatch watch = new StopWatch();

    watch.start();
    for (int i=0; i<ITERATIONS; ++i) {
        if (StringUtil.split(TEXT, fields, '\t') > 100) {
            System.out.println("Mama Mia that's a lot of tokens!!");
        }
    }
    watch.stop();
    System.out.println("StringUtil.split() took " + watch.getElapsedTime());
    watch.reset();

    watch.start();
    for (int i=0; i<ITERATIONS; ++i) {
        if (split(TEXT, fields, "\t") > 100) {
            System.out.println("Mama Mia that's a lot of tokens!!");
        }
    }
    watch.stop();
    System.out.println("StringTokenizer took " + watch.getElapsedTime());
}
 
示例2
private void writeMetrics (final boolean writeEditDistanceDistribution, final String context, final AdaptiveMappingResult r, final PrintStream out) {
	if (out==null) return;
	List<EditDistanceMappingMetric> metricList= r.getMetricResult();

	for (EditDistanceMappingMetric edmm: metricList) {
		edmm.getOriginalObservations();
		// Steve reports the number of barcodes including the one that everything is merged into.
		List<String> line = new ArrayList<>(Arrays.asList(context, edmm.getBarcode(), Integer.toString(edmm.getNumMergedBarcodes()+1), Integer.toString(edmm.getEditDistanceDiscovered()), Integer.toString(edmm.getEditDistanceUsed()),
				Integer.toString(edmm.getOriginalObservations()), Integer.toString(edmm.getTotalObservations())));

		if (writeEditDistanceDistribution) {
			int [] edList = edmm.getEdList();
			if (edList.length>0) {
				Integer[] x = Arrays.stream( edList ).boxed().toArray( Integer[]::new );
				String edFormatted = StringUtil.join(",", x);
				line.add(edFormatted);
			} else
				line.add("NA");

		}
		out.println(StringUtil.join("\t", line));

	}
}
 
示例3
/**
 * Test to see if this read matches this barcode. If any base of a barcode
 * starts with N or n, then ignore that position.
 *
 * @param testString
 *            The read to look for this barcode in. The barcode should be at
 *            the start of the read for this method.  The entire barcode is expected for a match.
 * @return true if this barcode is found in the read.
 */
public boolean hasForwardMatch(final String testString) {
	byte[] testBases = StringUtil.stringToBytes(testString);
	int numBasesCanMatch = 0;
	int numBasesMatch = 0;
	for (int i = 0; i < bases.length; i++) {
		if (isIgnoreBase(this.bases[i]))
			continue;
		numBasesCanMatch++;
		if (SequenceUtil.basesEqual(testBases[i], bases[i]))
			numBasesMatch++;
	}
	if (numBasesCanMatch == numBasesMatch)
		return (true);
	return false;
}
 
示例4
@Test(enabled=true)
/**
 * Add an uneven number of bases and quals to trip the exception throw.
 */
public void testAddBaseQualsError () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,17,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=false;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		Assert.assertNotNull(e);
		passes=true;
	}
	Assert.assertTrue(passes);
}
 
示例5
@Test(enabled=true)
public void testAddBaseQuals () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=true;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		passes=false;
	}
	Assert.assertTrue(passes);
}
 
示例6
@Test(enabled=true)
public void testMixedLikelihoodMultiRead () {
	GenotypeType [] g = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
	List<GenotypeType> genotypes  = Arrays.asList(g);

	Double [] m = {new Double(2), new Double(1), new Double(1)};
	List<Double> mixture  = Arrays.asList(m);

	char refAllele ='A';
	char altAllele ='T';

	Byte [] b = {StringUtil.charToByte('A'), StringUtil.charToByte('A')};
	List<Byte> bases = Arrays.asList(b);
	Byte [] q = {new Byte ((byte)10), new Byte ((byte)10)};
	List<Byte> qualities =Arrays.asList(q);

	double result = LikelihoodUtils.getInstance().getLogLikelihoodMixedModel(refAllele, altAllele, genotypes, mixture, bases, qualities, null, null, null);
	Assert.assertEquals(result, Math.log10(0.36), 0.001);

}
 
示例7
private final String alterBaseString(final String baseString, final int numChanges) {
     final byte[] bases = StringUtil.stringToBytes(baseString);
     if (numChanges > baseString.length())
throw new IllegalArgumentException("Too many changes requested");
     final Set<Integer> mutatedPositions = new HashSet<>();
     int changesSoFar = 0;
     while (changesSoFar < numChanges) {
         int positionToChange = random.nextInt(bases.length);
         while (mutatedPositions.contains(positionToChange))
	positionToChange = random.nextInt(bases.length);
         mutatedPositions.add(positionToChange);
         bases[positionToChange] = alterBase(bases[positionToChange]);
         ++changesSoFar;
     }
     return StringUtil.bytesToString(bases);
 }
 
示例8
@Override
protected String[] customCommandLineValidation() {
    IOUtil.assertDirectoryIsReadable(BASECALLS_DIR);
    final List<String> errors = new ArrayList<>();

    for (final Integer lane : LANES) {
        if (lane < 1) {
            errors.add(
                    "LANES must be greater than or equal to 1.  LANES passed in " + StringUtil.join(", ", LANES));
            break;
        }
    }

    if (errors.isEmpty()) {
        return null;
    } else {
        return errors.toArray(new String[errors.size()]);
    }
}
 
示例9
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
  final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

  // Compute MD5 of upcased bases
  final byte[] bases = refSeq.getBases();
  for (int i = 0; i < bases.length; ++i) {
    bases[i] = StringUtil.toUpperCase(bases[i]);
  }

  ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
  if (GENOME_ASSEMBLY != null) {
    ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
  }
  ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
  if (SPECIES != null) {
    ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
  }
  return ret;
}
 
示例10
public File getRefFlatFile(String sequence) throws Exception {
    // Create a refFlat file with a single gene containing two exons, one of which is overlapped by the
    // ribosomal interval.
    final String[] refFlatFields = new String[RefFlatColumns.values().length];
    refFlatFields[RefFlatColumns.GENE_NAME.ordinal()] = "myGene";
    refFlatFields[RefFlatColumns.TRANSCRIPT_NAME.ordinal()] = "myTranscript";
    refFlatFields[RefFlatColumns.CHROMOSOME.ordinal()] = sequence;
    refFlatFields[RefFlatColumns.STRAND.ordinal()] = "+";
    refFlatFields[RefFlatColumns.TX_START.ordinal()] = "49";
    refFlatFields[RefFlatColumns.TX_END.ordinal()] = "500";
    refFlatFields[RefFlatColumns.CDS_START.ordinal()] = "74";
    refFlatFields[RefFlatColumns.CDS_END.ordinal()] = "400";
    refFlatFields[RefFlatColumns.EXON_COUNT.ordinal()] = "2";
    refFlatFields[RefFlatColumns.EXON_STARTS.ordinal()] = "49,249";
    refFlatFields[RefFlatColumns.EXON_ENDS.ordinal()] = "200,500";

    final File refFlatFile = File.createTempFile("tmp.", ".refFlat");
    refFlatFile.deleteOnExit();
    final PrintStream refFlatStream = new PrintStream(refFlatFile);
    refFlatStream.println(StringUtil.join("\t", refFlatFields));
    refFlatStream.close();

    return refFlatFile;
}
 
示例11
/**
 * Call this method to create a ClusterData iterator over the specified tiles.
 *
 * @return An iterator for reading the Illumina basecall output for the lane specified in the constructor.
 */
public BaseIlluminaDataProvider makeDataProvider(List<Integer> requestedTiles) {
    if (requestedTiles == null) {
        requestedTiles = availableTiles;
    } else {
        if (requestedTiles.isEmpty()) {
            throw new PicardException("Zero length tile list supplied to makeDataProvider, you must specify at least 1 tile OR pass NULL to use all available tiles");
        }
    }

    final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataType = new HashMap<>();
    for (final Map.Entry<SupportedIlluminaFormat, Set<IlluminaDataType>> fmToDt : formatToDataTypes.entrySet()) {
        parsersToDataType.put(makeParser(fmToDt.getKey(), requestedTiles), fmToDt.getValue());
    }

    log.debug("The following parsers will be used by IlluminaDataProvider: " + StringUtil.join("," + parsersToDataType.keySet()));

    return new IlluminaDataProvider(outputMapping, parsersToDataType, basecallDirectory, lane);
}
 
示例12
/**
 * Assert that expectedCols are present and return actualCols - expectedCols
 *
 * @param actualCols   The columns present in the LIBRARY_PARAMS file
 * @param expectedCols The columns that are REQUIRED
 * @return actualCols - expectedCols
 */
private Set<String> findAndFilterExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
    final Set<String> missingColumns = new HashSet<>(expectedCols);
    missingColumns.removeAll(actualCols);

    if (!missingColumns.isEmpty()) {
        throw new PicardException(String.format(
                "LIBRARY_PARAMS file %s is missing the following columns: %s.",
                LIBRARY_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns
                )));
    }

    final Set<String> remainingColumns = new HashSet<>(actualCols);
    remainingColumns.removeAll(expectedCols);
    return remainingColumns;
}
 
示例13
/**
 * Given a set of columns assert that all columns conform to the format of an RG header attribute (i.e. 2 letters)
 * the attribute is NOT a member of the rgHeaderTags that are built by default in buildSamHeaderParameters
 *
 * @param rgTagColumns A set of columns that should conform to the rg header attribute format
 */
private void checkRgTagColumns(final Set<String> rgTagColumns) {
    final Set<String> forbiddenHeaders = buildSamHeaderParameters(null).keySet();
    forbiddenHeaders.retainAll(rgTagColumns);

    if (!forbiddenHeaders.isEmpty()) {
        throw new PicardException("Illegal ReadGroup tags in library params(barcode params) file(" + LIBRARY_PARAMS.getAbsolutePath() + ") Offending headers = " + StringUtil.join(", ", forbiddenHeaders));
    }

    for (final String column : rgTagColumns) {
        if (column.length() > 2) {
            throw new PicardException("Column label (" + column + ") unrecognized.  Library params(barcode params) can only contain the columns " +
                    "(OUTPUT, LIBRARY_NAME, SAMPLE_ALIAS, BARCODE, BARCODE_<X> where X is a positive integer) OR two letter RG tags!");
        }
    }
}
 
示例14
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
    final SAMRecord srec = new SAMRecord(header);
    srec.setReadName(baseName);
    srec.setReadString(frec.getReadString());
    srec.setReadUnmappedFlag(true);
    srec.setAttribute(ReservedTagConstants.READ_GROUP_ID, READ_GROUP_NAME);
    final byte[] quals = StringUtil.stringToBytes(frec.getBaseQualityString());
    convertQuality(quals, QUALITY_FORMAT);
    for (final byte qual : quals) {
        final int uQual = qual & 0xff;
        if (uQual < MIN_Q || uQual > MAX_Q) {
            throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
            MAX_Q + " for read " + frec.getReadHeader());
        }
    }
    srec.setBaseQualities(quals);

    if (paired) {
        srec.setReadPairedFlag(true);
        srec.setMateUnmappedFlag(true);
    }
    return srec ;
}
 
示例15
/** Returns read baseName and asserts correct pair read name format:
 * <ul>
 * <li> Paired reads must either have the exact same read names or they must contain at least one "/"
 * <li> and the First pair read name must end with "/1" and second pair read name ends with "/2"
 * <li> The baseName (read name part before the /) must be the same for both read names
 * <li> If the read names are exactly the same but end in "/2" or "/1" then an exception will be thrown
 * </ul>
 */
String getBaseName(final String readName1, final String readName2, final FastqReader freader1, final FastqReader freader2) {
    String [] toks = getReadNameTokens(readName1, 1, freader1);
    final String baseName1 = toks[0] ;
    final String num1 = toks[1] ;

    toks = getReadNameTokens(readName2, 2, freader2);
    final String baseName2 = toks[0] ;
    final String num2 = toks[1];

    if (!baseName1.equals(baseName2)) {
        throw new PicardException(String.format("In paired mode, read name 1 (%s) does not match read name 2 (%s)", baseName1,baseName2));
    }

    final boolean num1Blank = StringUtil.isBlank(num1);
    final boolean num2Blank = StringUtil.isBlank(num2);
    if (num1Blank || num2Blank) {
        if(!num1Blank) throw new PicardException(error(freader1,"Pair 1 number is missing (" +readName1+ "). Both pair numbers must be present or neither."));       //num1 != blank and num2   == blank
        else if(!num2Blank) throw new PicardException(error(freader2, "Pair 2 number is missing (" +readName2+ "). Both pair numbers must be present or neither.")); //num1 == blank and num =2 != blank
    } else {
        if (!num1.equals("1")) throw new PicardException(error(freader1,"Pair 1 number must be 1 ("+readName1+")"));
        if (!num2.equals("2")) throw new PicardException(error(freader2,"Pair 2 number must be 2 ("+readName2+")"));
    }

    return baseName1 ;
}
 
示例16
private void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName){
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null){
        // Now run R to generate a chart

        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

        /*
         * A subtitle for the plot, usually corresponding to a library.
         */
        String plotSubtitle = "";
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getMeanQualityByCycleRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
示例17
/**
 * Fills a halfContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
 */
public void fillHalfRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
    final String padding = StringUtil.repeatCharNTimes('N', contextSize);

    for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
        final String fullContextKey = fullContext.getKey();
        final char centralBase = fullContextKey.charAt(contextSize);
        final String leadingContextKey = fullContextKey.substring(0, contextSize) + centralBase + padding;
        final String trailingContextKey = padding + centralBase + fullContextKey.substring(contextSize + 1, fullContextKey.length());

        final AlignmentAccumulator[] trailingAlignmentAccumulators = this.artifactMap.get(trailingContextKey);
        final AlignmentAccumulator[] leadingAlignmentAccumulators = this.artifactMap.get(leadingContextKey);
        final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();

        for (int i=0; i < fullAlignmentAccumulators.length; i++) {
            trailingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
            leadingAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
        }
    }
}
 
示例18
/**
 * Fills a zeroContextAccumulator by summing over the appropriate counts from a fullContextAccumulator.
 */
public void fillZeroRecords(final ContextAccumulator fullContextAccumulator, final int contextSize) {
    final String padding = StringUtil.repeatCharNTimes('N', contextSize);

    for (Map.Entry<String,AlignmentAccumulator[]> fullContext : fullContextAccumulator.artifactMap.entrySet()) {
        final String fullContextKey = fullContext.getKey();
        final char centralBase = fullContextKey.charAt(contextSize);
        final String zeroContextKey = padding + centralBase + padding;

        final AlignmentAccumulator[] zeroAlignmentAccumulators = this.artifactMap.get(zeroContextKey);
        final AlignmentAccumulator[] fullAlignmentAccumulators = fullContext.getValue();

        for (int i=0; i < fullAlignmentAccumulators.length; i++) {
            zeroAlignmentAccumulators[i].merge(fullAlignmentAccumulators[i]);
        }
    }
}
 
示例19
private void convertParamsFile(String libraryParamsFile, int concatNColumnFields, File testDataDir, File outputDir, File libraryParams, List<File> outputPrefixes) throws FileNotFoundException {
    try (LineReader reader = new BufferedLineReader(new FileInputStream(new File(testDataDir, libraryParamsFile)))) {
        final PrintWriter writer = new PrintWriter(libraryParams);
        final String header = reader.readLine();
        writer.println(header + "\tOUTPUT_PREFIX");
        while (true) {
            final String line = reader.readLine();
            if (line == null) {
                break;
            }
            final String[] fields = line.split("\t");
            final File outputPrefix = new File(outputDir, StringUtil.join("", Arrays.copyOfRange(fields, 0, concatNColumnFields)));
            outputPrefixes.add(outputPrefix);
            writer.println(line + "\t" + outputPrefix);
        }
        writer.close();
    }
}
 
示例20
private void validateReferenceBases(File referenceFile) {
    final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
    ReferenceSequence sequence;
    while ((sequence = refSeqFile.nextSequence()) != null) {
        for (final byte base: sequence.getBases()) {
            if (!IUPAC_TABLE[base]) {
                messages.baseErrors = String.format("WARNING: AT least one invalid base '%c' (decimal %d) in reference sequence named %s",
                        StringUtil.byteToChar(base), base, sequence.getName());
                break;
            }
        }
    }
}
 
示例21
public int getCountBase (final char base) {
	Byte baseB = StringUtil.charToByte(base);
	int count=0;
	for (Byte b: bases)
		if (b.equals(baseB)) count++;
	return count;
}
 
示例22
private static Tuple2<TestDataBreakEndVariants, TestDataBreakEndVariants> forInterChromosomeStrandSwitch55() {

        String contigName = "forInterChromosomeStrandSwitch55";
        byte[] contigSequence = "ATTCTGAGAAACTTCATTTTGATGTGTGCATTCATCTTCCAGAGTTGAAACTTTCTTTTGATTGTGTAGTTTTGAAACACTCTTTTTGTAGAATCTGCAAGGGGGTATTTGTAGGGATTTGAAGCCTATTGTTGAAAAGGTAATATCTTCACATAAAAACTACATAGGATCATTCGGAGAAACTTCTTTGTGATGTGTGCATTCAACTCACAGAGTTGAACCTATCTTTTTTTTTTTAATGTCTGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGAGCAATTTGAGGCCTAAGGTGGAAAAGGAAATATTTTCACATAAAAACTAGACAGAAGAATTCTGTGAAACTTGTTCAGGACCTGTGCATTCATCTTACAGATTTGAATCTTTCTTTTGATTGAGCAGTTTGGAAACACTGTTTTTGTAGAATCTTCAGGTGGACATTCAGAGCACTTTGTGTCCTATGGTAGAAAAGGAAATATCTTCATA".getBytes();
        String homology = "";
        String insSeq = StringUtil.bytesToString(getReverseComplimentCopy(Arrays.copyOfRange(contigSequence, 230, 292)));
        AlignmentInterval firstAlignment = new AlignmentInterval(new SimpleInterval("chr21:10784600-10784829"), 1, 230, TextCigarCodec.decode("230M279S"), true, 60, 12, 170, ContigAlignmentsModifier.AlnModType.NONE);
        AlignmentInterval secondAlignment = new AlignmentInterval(new SimpleInterval("chr20:28817762-28817977"), 293, 509, TextCigarCodec.decode("292H93M1I123M"), false, 60, 11, 149, ContigAlignmentsModifier.AlnModType.NONE);
        SimpleChimera simpleChimera = new SimpleChimera(contigName, firstAlignment, secondAlignment, StrandSwitch.FORWARD_TO_REVERSE, false, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
        SimpleInterval expectedLeftBreakpoint = new SimpleInterval("chr20:28817977-28817977");
        SimpleInterval expectedRightBreakpoint = new SimpleInterval("chr21:10784829-10784829");
        final BreakpointComplications expectedBreakpointComplications = new BreakpointComplications.InterChromosomeBreakpointComplications(homology, insSeq);
        NovelAdjacencyAndAltHaplotype expectedNovelAdjacencyAndAltSeq = new NovelAdjacencyAndAltHaplotype(expectedLeftBreakpoint, expectedRightBreakpoint, StrandSwitch.FORWARD_TO_REVERSE, expectedBreakpointComplications, TypeInferredFromSimpleChimera.INTER_CHR_STRAND_SWITCH_55, EMPTY_BYTE_ARRAY);
        final List<SvType> expectedSVTypes = Arrays.asList(
                makeBNDType("chr20", 28817977, "BND_chr20_28817977_chr21_10784829_1", Allele.create("A", true), Allele.create("A"+insSeq+"]chr21:10784829]"), Collections.emptyMap(), true, BreakEndVariantType.SupportedType.INTER_CHR_STRAND_SWITCH_55),
                makeBNDType("chr21", 10784829, "BND_chr20_28817977_chr21_10784829_2", Allele.create("T", true), Allele.create("T"+ SequenceUtil.reverseComplement(insSeq) +"]chr20:28817977]"), Collections.emptyMap(), false, BreakEndVariantType.SupportedType.INTER_CHR_STRAND_SWITCH_55)
        );
        final List<VariantContext> expectedVariants = Arrays.asList(
                addStandardAttributes(makeBND(expectedLeftBreakpoint, expectedRightBreakpoint, Allele.create("A", true), insSeq, "", true, true, true), contigName, 60, 216, homology, insSeq, "BND_chr20_28817977_chr21_10784829_2").make(),
                addStandardAttributes(makeBND(expectedLeftBreakpoint, expectedRightBreakpoint, Allele.create("T", true), SequenceUtil.reverseComplement(insSeq), "", false, true, true), contigName, 60, 216, homology, insSeq, "BND_chr20_28817977_chr21_10784829_1").make()
        );

        final TestDataBreakEndVariants forInterChromosomeStrandSwitch55_plus =
                new TestDataBreakEndVariants(firstAlignment, secondAlignment, contigName, contigSequence, true, simpleChimera, expectedNovelAdjacencyAndAltSeq, expectedSVTypes, expectedVariants, BreakpointsInference.InterChromosomeBreakpointsInference.class);


        firstAlignment = new AlignmentInterval(new SimpleInterval("chr20:28817762-28817977"), 1, 217, TextCigarCodec.decode("123M1I93M292H"), true, 60, 11, 149, ContigAlignmentsModifier.AlnModType.NONE);
        secondAlignment = new AlignmentInterval(new SimpleInterval("chr21:10784600-10784829"), 280, 509, TextCigarCodec.decode("279S230M"), false, 60, 12, 170, ContigAlignmentsModifier.AlnModType.NONE);
        simpleChimera = new SimpleChimera(contigName, firstAlignment, secondAlignment, StrandSwitch.FORWARD_TO_REVERSE, true, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);

        final TestDataBreakEndVariants forInterChromosomeStrandSwitch55_minus =
                new TestDataBreakEndVariants(firstAlignment, secondAlignment, contigName, getReverseComplimentCopy(contigSequence), false, simpleChimera, expectedNovelAdjacencyAndAltSeq, expectedSVTypes, expectedVariants, BreakpointsInference.InterChromosomeBreakpointsInference.class);

        return new Tuple2<>(forInterChromosomeStrandSwitch55_plus, forInterChromosomeStrandSwitch55_minus);
    }
 
示例23
public static byte [] setSequenceToN (final byte [] fastaRefBases, final Interval interval) {
	byte [] result = fastaRefBases;
	int startBase=interval.getStart();
	int endBase=interval.getEnd();
	// the byte [] is base 0, the coordinates are base 1.
	Arrays.fill(result, startBase-1, endBase, StringUtil.charToByte('N'));
	return (result);
}
 
示例24
private SAMFileWriter getWriter (final SamReader reader) {
	SAMFileHeader header = reader.getFileHeader();
	SamHeaderUtil.addPgRecord(header, this);
	String context = StringUtil.join(" ", this.CONTEXT_TAGS);
	header.addComment("Edit distance collapsed tag " +  this.COLLAPSE_TAG + " to new tag " + this.OUT_TAG+ " with edit distance "+ this.EDIT_DISTANCE + "using indels=" + this.FIND_INDELS + " in the context of tags [" + context + "]");
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, this.OUTPUT);
       return writer;
}
 
示例25
private void writeReport (final BottomUpCollapseResult result, final BottomUpCollapseResult resultClean, final UMIsPerCellResult umiResult) {
	PrintStream outReport = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.OUTPUT_REPORT));

	// write comments section, each line starts with a "#"
	outReport.println("# FILTER_AMBIGUOUS="+FILTER_AMBIGUOUS);
	outReport.println("# MIN_UMIS_PER_CELL="+MIN_UMIS_PER_CELL);
	outReport.println("# UMI_BIAS_THRESHOLD="+MIN_UMIS_PER_CELL);
	outReport.println("# EDIT_DISTANCE="+MIN_UMIS_PER_CELL);
	outReport.println("#");
	outReport.println("# TOTAL_BARCODES_TESTED="+umiResult.getNumCellBarocodesTested());
	outReport.println("# BARCODES_COLLAPSED="+result.getUnambiguousSmallBarcodes().size());
	outReport.println("# ESTIMATED_UMIS_COLLAPSED="+getTotalAmbiguousUMIs(result.getUnambiguousSmallBarcodes(), umiResult.getUmisPerCell()));
	outReport.println("# AMBIGUOUS_BARCODES="+result.getAmbiguousBarcodes().size());
	outReport.println("# ESTIMATED_AMBIGUOUS_UMIS="+getTotalAmbiguousUMIs(result.getAmbiguousBarcodes(), umiResult.getUmisPerCell()));
	outReport.println("# POLY_T_BIASED_BARCODES="+umiResult.getPolyTBiasedBarcodes());
	outReport.println("# POLY_T_BIASED_BARRCODE_UMIS="+umiResult.getPolyTBiasedUMIs());
	outReport.println("# POLY_T_POSITION="+umiResult.getPolyTPosition());

	/// write header
	String [] header= {"intended_barcode", "neighbor_barcode", "intended_size", "neighbor_size", "position", "intended_base", "neighbor_base", "repaired"};
	outReport.println(StringUtil.join("\t", header));

	ObjectCounter<String> umiCounts=umiResult.getUmisPerCell();
	Iterator<String> smalls = result.getUnambiguousSmallBarcodes().iterator();
	while (smalls.hasNext()) {
		String small=smalls.next();
		String large = result.getLargerRelatedBarcode(small);
		BarcodeSubstitutionPair p = new BarcodeSubstitutionPair(large, small);
		String cleanLarger = resultClean.getLargerRelatedBarcode(small);
		boolean repaired = cleanLarger!=null;

		String [] body = {large, small, Integer.toString(umiCounts.getCountForKey(large)), Integer.toString(umiCounts.getCountForKey(small)),
				Integer.toString(p.getPosition()+1), p.getIntendedBase(), p.getNeighborBase(), Boolean.toString(repaired).toUpperCase()};
		outReport.println(StringUtil.join("\t", body));
	}

	CloserUtil.close(outReport);
}
 
示例26
private String [] getFirstSplit(final SAMRecord rec) {

    	Object strIntervalRec = rec.getAttribute(tag);
    	if (!(strIntervalRec instanceof String))
			throw new IllegalArgumentException(SAMTagUtil.getSingleton().makeStringTag(this.tag) + " does not have a String value");
    	String intervalString = (String) strIntervalRec;
    	String [] result = new String [4];
    	StringUtil.splitConcatenateExcessTokens(intervalString, result, ENCODE_DELIMITER);
    	return (result);
    }
 
示例27
private int [] parsePosition (final String posString) {
 	String [] posArray = new String [2];
 	StringUtil.split(posString, posArray, '-');
 	int start=Integer.parseInt(posArray[0]);
 	// set the end = the start, this changes if there's a second position in the field.
 	int end=Integer.parseInt(posArray[0]);
 	if (posArray[1]!=null)
end = Integer.parseInt(posArray[1]);

 	int [] result = {start,end};
 	return result;

 }
 
示例28
private void WriteDimensionLabels(final String dimensionName, final List<String> names) {
    try {
        for (int i = 0; i < names.size(); i += MatrixMarketConstants.NUM_HEADER_ELEMENTS_PER_ROW) {
            writer.write(MatrixMarketConstants.MM_STRUCTURED_COMMENT_LINE_START);
            writer.write(dimensionName);
            writer.write(MatrixMarketConstants.MM_HEADER_LIST_SEPARATOR);
            final int elementsToWrite = Math.min(MatrixMarketConstants.NUM_HEADER_ELEMENTS_PER_ROW, names.size() - i);
            writer.write(StringUtil.join(MatrixMarketConstants.MM_HEADER_LIST_SEPARATOR, names.subList(i, i + elementsToWrite)));
            writer.newLine();
        }
    } catch (IOException e) {
        throw new RuntimeIOException("Exception writing " + filename, e);
    }
}
 
示例29
public PolyARun getPolyAStart(final String readString, final String adapterSequence) {
    final byte[] readBases = StringUtil.stringToBytes(readString);
    int adapterClipPosition = ClippingUtility.findIndexOfClipSequence(
            readBases,
            StringUtil.stringToBytes(adapterSequence),
            minAdapterMatch,
            maxAdapterErrorRate);
    if (adapterClipPosition == ClippingUtility.NO_MATCH) {
        adapterClipPosition = readString.length();
    } else if (adapterClipPosition == 0) {
        return new PolyARun(0, 0, 0);
    }
    final SimplePolyAFinder.PolyARun ret = getPolyARun(readString, adapterClipPosition);

    // If there was a short adapter match, but not enough poly A before it,
    // see if there would be enough poly A if the adapter considered not to match.
    if (ret.isNoMatch() && adapterClipPosition < readString.length() &&
            adapterClipPosition + dubiousAdapterMatchLength >= readString.length()) {
        // If did not find enough polyA looking before adapter, try again looking from end of read.
        final SimplePolyAFinder.PolyARun tryWithoutAdapter = getPolyARun(readString, readString.length());
        if (!tryWithoutAdapter.isNoMatch()) {
            return tryWithoutAdapter;
        }
    }
    return ret;

}
 
示例30
public TrimSequenceTemplate(final String sequence, final String ignoredBases) {
	this.sequence = sequence;
	this.reverseComplement = SequenceUtil.reverseComplement(this.sequence);
	bases = StringUtil.stringToBytes(this.sequence);
	rcBases = StringUtil
			.stringToBytes(this.reverseComplement);
	this.ignoredBases = StringUtil
			.stringToBytes(ignoredBases);
}