Java源码示例:htsjdk.variant.vcf.VCFHeaderLine

示例1
@NotNull
public static VCFHeader generateOutputHeader(@NotNull final VCFHeader header, @NotNull final String sampleName) {
    final VCFHeader outputVCFHeader = new VCFHeader(header.getMetaDataInInputOrder(), Sets.newHashSet(sampleName));
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD"));

    outputVCFHeader.addMetaDataLine(new VCFHeaderLine("StrelkaGATKCompatibility",
            "Added GT fields to strelka calls for gatk compatibility."));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("MAPPABILITY", 1, VCFHeaderLineType.Float, "Mappability (percentage)"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("SOMATIC_PON_COUNT",
            1,
            VCFHeaderLineType.Integer,
            "Number of times the variant appears in the somatic PON"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("GERMLINE_PON_COUNT",
            1,
            VCFHeaderLineType.Integer,
            "Number of times the variant appears in the germline PON"));
    return outputVCFHeader;
}
 
示例2
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);

    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
            .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
            .mapToDouble(mixingfractionsMap::get).toArray();
}
 
示例3
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    Mutect2FilteringEngine.M_2_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add);
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.ARTIFACT_IN_NORMAL_FILTER_NAME, "artifact_in_normal"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_BASE_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median base quality"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_MAPPING_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median mapping quality"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_CLIPPING_DIFFERENCE_FILTER_NAME, "ref - alt median clipping"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, "abs(ref - alt) median fragment length"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.READ_POSITION_FILTER_NAME, "median distance of alt variants from end of reads"));
    headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.CONTAMINATION_FILTER_NAME, "contamination"));
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    vcfWriter = createVCFWriter(new File(outputVcf));
    vcfWriter.writeHeader(vcfHeader);
}
 
示例4
private static VariantContextWriter getVariantContextWriter(final File outputFile,
                                                            final File referenceSequenceFileName,
                                                            final String sample,
                                                            final String source,
                                                            final ReferenceSequenceFile ref) {
    final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder()
            .setReferenceDictionary(ref.getSequenceDictionary())
            .setOutputFile(outputFile).build();

    final Set<VCFHeaderLine> lines = new LinkedHashSet<>();
    lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath()));
    lines.add(new VCFHeaderLine("source", source));
    lines.add(new VCFHeaderLine("fileDate", new Date().toString()));

    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));

    final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample));
    header.setSequenceDictionary(ref.getSequenceDictionary());
    variantContextWriter.writeHeader(header);
    return variantContextWriter;
}
 
示例5
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);

    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
            .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
            .mapToDouble(mixingfractionsMap::get).toArray();
}
 
示例6
private void writeVCFHeader(VariantContextWriter vcfWriter) {
    // setup the header fields
    final VCFHeader inputHeader = getHeaderForVariants();
    Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>();
    hInfo.addAll(inputHeader.getMetaDataInSortedOrder());

    boolean hasInfoKey = hInfo.stream().anyMatch(
            x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey));
    if (!hasInfoKey){
        throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey));
    }

    if (removeOldFilters){
        hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine);
    }

    addTrancheHeaderFields(SNPString, snpTranches, hInfo);
    addTrancheHeaderFields(INDELString, indelTranches, hInfo);

    final TreeSet<String> samples = new TreeSet<>();
    samples.addAll(inputHeader.getGenotypeSamples());
    hInfo.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
}
 
示例7
@Override
public void onTraversalStart() {
    realignmentEngine = new RealignmentEngine(realignmentArgumentCollection);
    vcfWriter = createVCFWriter(new File(outputVcf));

    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY));
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    vcfWriter.writeHeader(vcfHeader);
    bamHeader = getHeaderForReads();
    samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(bamHeader)));
    referenceReader = AssemblyBasedCallerUtils.createReferenceReader(Utils.nonNull(referenceArguments.getReferenceSpecifier()));
    assemblyEngine = MTAC.createReadThreadingAssembler();
    likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs);
    haplotypeBAMWriter = bamOutputPath == null ? Optional.empty() :
            Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(bamOutputPath), true, false, getHeaderForSAMWriter()));
}
 
示例8
private static CpxAndReInterpretedSimpleVariants extractCpxVariants(final JavaSparkContext ctx,
                                                                    final JavaRDD<AssemblyContigWithFineTunedAlignments> contigsWithCpxAln,
                                                                    final SvDiscoveryInputMetaData svDiscoveryInputMetaData,
                                                                    final JavaRDD<GATKRead> assemblyRawAlignments,
                                                                    final String outputPrefixWithSampleName) {
    final Logger toolLogger = svDiscoveryInputMetaData.getDiscoverStageArgs().runInDebugMode ? svDiscoveryInputMetaData.getToolLogger() : null;
    final Set<VCFHeaderLine> defaultToolVCFHeaderLines = svDiscoveryInputMetaData.getDefaultToolVCFHeaderLines();
    final List<VariantContext> complexVariants =
            CpxVariantInterpreter.makeInterpretation(contigsWithCpxAln, svDiscoveryInputMetaData);
    SVVCFWriter.writeVCF(complexVariants, outputPrefixWithSampleName + COMPLEX_CHIMERA_VCF_FILE_NAME,
            svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(),
            defaultToolVCFHeaderLines, toolLogger);

    final JavaRDD<VariantContext> complexVariantsRDD = ctx.parallelize(complexVariants);
    final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants reInterpretedSimple =
            SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariantsRDD, svDiscoveryInputMetaData, assemblyRawAlignments);
    final SAMSequenceDictionary refSeqDict = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue();
    final String derivedOneSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_1_SEG_CALL_VCF_FILE_NAME;
    final String derivedMultiSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_MULTI_SEG_CALL_VCF_FILE_NAME;
    SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger);
    SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger);

    return new CpxAndReInterpretedSimpleVariants(complexVariants, reInterpretedSimple.getMergedReinterpretedCalls());
}
 
示例9
@Override
protected void runTool(final JavaSparkContext ctx) {

    // TODO: 5/9/18 getback sample name in output files
    final SAMFileHeader headerForReads = getHeaderForReads();
    final Set<VCFHeaderLine> defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines();
    final SvDiscoveryInputMetaData svDiscoveryInputMetaData =
            new SvDiscoveryInputMetaData(ctx, discoverStageArgs, nonCanonicalChromosomeNamesFile,
                    derivedSimpleVCFPrefix,
                    null, null, null, null,
                    headerForReads, getReference(), defaultToolVCFHeaderLines, localLogger);

    final JavaRDD<VariantContext> complexVariants = new VariantsSparkSource(ctx)
            .getParallelVariantContexts(complexVCF, getIntervals());
    final JavaRDD<GATKRead> assemblyRawAlignments = getReads();

    final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants extract =
            SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariants, svDiscoveryInputMetaData, assemblyRawAlignments);

    final String derivedOneSegmentSimpleVCF = derivedSimpleVCFPrefix + "_1_seg.vcf";
    final String derivedMultiSegmentSimpleVCF = derivedSimpleVCFPrefix + "_multi_seg.vcf";
    final VCFHeader vcfHeader = VariantsSparkSource.getHeader(complexVCF);
    SVVCFWriter.writeVCF(extract.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), defaultToolVCFHeaderLines, logger);
    SVVCFWriter.writeVCF(extract.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), defaultToolVCFHeaderLines, logger);
}
 
示例10
public SvDiscoveryInputMetaData(final JavaSparkContext ctx,
                                final DiscoverVariantsFromContigAlignmentsSparkArgumentCollection discoverStageArgs,
                                final String nonCanonicalChromosomeNamesFile,
                                final String outputPath,
                                final ReadMetadata readMetadata,
                                final List<SVInterval> assembledIntervals,
                                final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTargetLinks,
                                final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast,
                                final SAMFileHeader headerForReads,
                                final ReferenceMultiSparkSource reference,
                                final Set<VCFHeaderLine> defaultToolVCFHeaderLines,
                                final Logger toolLogger) {

    final SAMSequenceDictionary sequenceDictionary = headerForReads.getSequenceDictionary();
    final Broadcast<Set<String>> canonicalChromosomesBroadcast =
            ctx.broadcast(SVUtils.getCanonicalChromosomes(nonCanonicalChromosomeNamesFile, sequenceDictionary));
    final String sampleId = SVUtils.getSampleId(headerForReads);

    this.referenceData = new ReferenceData(canonicalChromosomesBroadcast, ctx.broadcast(reference), ctx.broadcast(sequenceDictionary));
    this.sampleSpecificData = new SampleSpecificData(sampleId, cnvCallsBroadcast, assembledIntervals, evidenceTargetLinks, readMetadata, ctx.broadcast(headerForReads));
    this.discoverStageArgs = discoverStageArgs;
    this.outputPath = outputPath;
    this.defaultToolVCFHeaderLines = defaultToolVCFHeaderLines;
    this.toolLogger = toolLogger;
}
 
示例11
private static void writeVariants(final String fileName, final List<VariantContext> variantsArrayList,
                                  final SAMSequenceDictionary referenceSequenceDictionary,
                                  final Set<VCFHeaderLine> defaultToolVCFHeaderLines) {
    try (final OutputStream outputStream
                 = new BufferedOutputStream(BucketUtils.createFile(fileName))) {

        final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary);

        final VCFHeader vcfHeader = getVcfHeader(referenceSequenceDictionary);
        defaultToolVCFHeaderLines.forEach(vcfHeader::addMetaDataLine);
        vcfWriter.writeHeader(vcfHeader);
        variantsArrayList.forEach(vcfWriter::add);
        vcfWriter.close();

    } catch (final IOException e) {
        throw new GATKException("Could not create output file", e);
    }
}
 
示例12
@Test
public void testGetDefaultToolVCFHeaderLines() {
    final TestGATKSparkToolWithVariants tool = new TestGATKSparkToolWithVariants();
    final String[] args = {"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"};
    tool.instanceMain(args);

    Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines();
    VCFHeader hdr = new VCFHeader(stdHeaderLines);

    VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source");
    Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName());

    VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine");
    Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName());

    String commandLineString = commandLine.toString();
    assertContains(commandLineString,"CommandLine=");
    assertContains(commandLineString,"Version=");
    assertContains(commandLineString,"Date=");
}
 
示例13
@Test
public void testGetDefaultToolVCFHeaderLines() throws IOException {
    final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures();
    final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf");
    final String[] args = {"--mask", vcfFile.getCanonicalPath(), "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"};
    tool.instanceMain(args);

    Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines();
    VCFHeader hdr = new VCFHeader(stdHeaderLines);

    VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source");
    Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName());

    VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine");
    Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName());

    String commandLineString = commandLine.toString();
    assertContains(commandLineString,"CommandLine=");
    assertContains(commandLineString,"Version=");
    assertContains(commandLineString,"Date=");
}
 
示例14
private static VCFHeader getMinimalVCFHeader() {
    final Set<VCFHeaderLine> headerlines = new LinkedHashSet<>();
    VCFStandardHeaderLines.addStandardFormatLines(headerlines, true,
            VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY,
            VCFConstants.GENOTYPE_ALLELE_DEPTHS);

    VCFStandardHeaderLines.addStandardInfoLines(headerlines, true,
            VCFConstants.DEPTH_KEY,
            VCFConstants.RMS_MAPPING_QUALITY_KEY,
            VCFConstants.MAPPING_QUALITY_ZERO_KEY );

    Arrays.asList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY,
           GATKVCFConstants.CLIPPING_RANK_SUM_KEY,
           GATKVCFConstants.MLE_ALLELE_COUNT_KEY,
           GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY,
           GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY,
           GATKVCFConstants.READ_POS_RANK_SUM_KEY)
           .forEach( c -> headerlines.add(GATKVCFHeaderLines.getInfoLine(c)));

    headerlines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY));
    return new VCFHeader(headerlines, Collections.singleton(SAMPLE_NAME));
}
 
示例15
@Test
public void testCommandIncludedInOutputHeader() throws IOException {
    final List<String> vcfInputs = LOCAL_GVCFS;
    final String workspace = createTempDir("genomicsdb-tests").getAbsolutePath() + "/workspace";

    writeToGenomicsDB(vcfInputs, INTERVAL, workspace, 0, false, 0, 1);
    try(final FeatureReader<VariantContext> genomicsDBFeatureReader =
                getGenomicsDBFeatureReader(workspace, b38_reference_20_21))
    {
        final VCFHeader header = (VCFHeader) genomicsDBFeatureReader.getHeader();
        final Optional<VCFHeaderLine> commandLineHeaderLine = header.getMetaDataInSortedOrder().stream()
                .filter(line -> line.getValue().contains(GenomicsDBImport.class.getSimpleName()))
                .findAny();

        Assert.assertTrue(commandLineHeaderLine.isPresent(), "no headerline was present containing information about the GenomicsDBImport command");
    }


}
 
示例16
@NotNull
public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) {
    template.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion));
    template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, 1, VCFHeaderLineType.Float, PURPLE_AF_DESC));
    template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, 1, VCFHeaderLineType.Float, PURPLE_CN_DESC));
    template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_VARIANT_CN_INFO, 1, VCFHeaderLineType.Float, PURPLE_PLOIDY_DESC));
    template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_MINOR_ALLELE_CN_INFO, 1,  VCFHeaderLineType.Float, PURPLE_MINOR_ALLELE_PLOIDY_DESC));
    template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_GERMLINE_INFO, 1, VCFHeaderLineType.String, PURPLE_GERMLINE_DESC));
    template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_BIALLELIC_FLAG, 0, VCFHeaderLineType.Flag, PURPLE_BIALLELIC_DESC));

    return template;
}
 
示例17
@NotNull
public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) {
    final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
    outputVCFHeader.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion));

    outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.RECOVERED,
            0,
            VCFHeaderLineType.Flag,
            RECOVERED_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.INFERRED, 0, VCFHeaderLineType.Flag, INFERRED_DESC));
    outputVCFHeader.addMetaDataLine(new VCFFilterHeaderLine(INFERRED, INFERRED_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.IMPRECISE,
            0,
            VCFHeaderLineType.Flag,
            IMPRECISE_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(CIPOS, 2, VCFHeaderLineType.Integer, CIPOS_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(SVTYPE, 1, VCFHeaderLineType.String, SVTYPE_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_AF_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_METHOD, 1, VCFHeaderLineType.String, RECOVERY_METHOD_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_FILTER, UNBOUNDED, VCFHeaderLineType.String, RECOVERY_FILTER_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_JUNCTION_COPY_NUMBER_INFO, 1, VCFHeaderLineType.Float,
            PURPLE_JUNCTION_COPY_NUMBER_DESC));
    outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_CHANGE_INFO,
            UNBOUNDED,
            VCFHeaderLineType.Float,
            PURPLE_CN_CHANGE_DESC));
    return outputVCFHeader;
}
 
示例18
PonVCF(final String output, int sampleSize) {
    writer = new VariantContextWriterBuilder().setOutputFile(output)
            .modifyOption(Options.INDEX_ON_THE_FLY, false)
            .modifyOption(Options.USE_ASYNC_IO, false)
            .modifyOption(Options.DO_NOT_WRITE_GENOTYPES, true)
            .build();

    final VCFHeader header = new VCFHeader();
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_COUNT, 1, VCFHeaderLineType.Integer, "how many samples had the variant"));
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_TOTAL, 1, VCFHeaderLineType.Integer, "total depth"));
    header.addMetaDataLine(new VCFInfoHeaderLine(PON_MAX, 1, VCFHeaderLineType.Integer, "max depth"));
    header.addMetaDataLine(new VCFHeaderLine("PonInputSampleCount", String.valueOf(sampleSize)));
    writer.writeHeader(header);
}
 
示例19
/**
 * Writes an appropriate VCF header, given our arguments, to the provided writer
 *
 * @param vcfWriter writer to which the header should be written
 */
public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary,
                         final Set<VCFHeaderLine>  defaultToolHeaderLines) {
    Utils.nonNull(vcfWriter);

    final Set<VCFHeaderLine> headerInfo = new HashSet<>();
    headerInfo.addAll(defaultToolHeaderLines);

    headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
    // all annotation fields from VariantAnnotatorEngine
    headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
    // all callers need to add these standard annotation header lines
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    // all callers need to add these standard FORMAT field header lines
    VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
            VCFConstants.GENOTYPE_KEY,
            VCFConstants.GENOTYPE_QUALITY_KEY,
            VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_PL_KEY);

    if ( ! hcArgs.doNotRunPhysicalPhasing ) {
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
    }

    // FILTER fields are added unconditionally as it's not always 100% certain the circumstances
    // where the filters are used.  For example, in emitting all sites the lowQual field is used
    headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));

    if ( emitReferenceConfidence() ) {
        headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
    }

    final VCFHeader vcfHeader = new VCFHeader(headerInfo, sampleSet);
    vcfHeader.setSequenceDictionary(sequenceDictionary);
    vcfWriter.writeHeader(vcfHeader);
}
 
示例20
static Sex getFingerprintSex(final File file) {
    if (file != null) {
        try (VCFFileReader reader = new VCFFileReader(file, false)) {
            final VCFHeader header = reader.getFileHeader();
            final VCFHeaderLine gender = header.getMetaDataLine("gender");
            if (gender != null) {
                return Sex.valueOf(gender.getValue());
            }
        }
    }
    return Sex.Unknown;
}
 
示例21
public static String getValueFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
    VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
    if (otherHeaderLine != null) {
        return otherHeaderLine.getValue();
    } else {
        throw new IllegalArgumentException("Input VCF file is missing header line of type '" + keyName + "'");
    }
}
 
示例22
public static Integer getIntegerFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
    VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
    if (otherHeaderLine != null) {
        return Integer.valueOf(otherHeaderLine.getValue());
    } else {
        throw new IllegalArgumentException("Input VCF file is missing header line of type '" + keyName + "'");
    }
}
 
示例23
public static String getOptionalValueFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
    VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
    if (otherHeaderLine != null) {
        return otherHeaderLine.getValue();
    } else {
        return null;
    }
}
 
示例24
private void addAdditionalHeaderFields(VCFHeader header) {
    header.addMetaDataLine(new VCFHeaderLine(InfiniumVcfFields.ZCALL_VERSION, ZCALL_VERSION));
    header.addMetaDataLine(new VCFHeaderLine(InfiniumVcfFields.ZCALL_THRESHOLDS, ZCALL_THRESHOLDS_FILE.getName()));
    header.addMetaDataLine(new VCFInfoHeaderLine(InfiniumVcfFields.ZTHRESH_X, 1, VCFHeaderLineType.Float, "zCall X threshold"));
    header.addMetaDataLine(new VCFInfoHeaderLine(InfiniumVcfFields.ZTHRESH_Y, 1, VCFHeaderLineType.Float, "zCall Y threshold"));
    header.addMetaDataLine(new VCFFormatHeaderLine(InfiniumVcfFields.GTA, 1, VCFHeaderLineType.String, "Illumina Autocall Genotype"));
    header.addMetaDataLine(new VCFFormatHeaderLine(InfiniumVcfFields.GTZ, 1, VCFHeaderLineType.String, "zCall Genotype"));
}
 
示例25
private String getValueFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
    VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
    if (otherHeaderLine != null) {
        return otherHeaderLine.getValue();
    } else {
        throw new IllegalArgumentException("Input VCF file is missing header line of type '" + keyName + "'");
    }
}
 
示例26
private void writeAllViolations(final MendelianViolationDetector.Result result) {
    if (VCF_DIR != null) {
        LOG.info(String.format("Writing family violation VCFs to %s/", VCF_DIR.getAbsolutePath()));

        final VariantContextComparator vcComparator = new VariantContextComparator(inputHeader.get().getContigLines());
        final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputHeader.get().getMetaDataInInputOrder());

        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.MENDELIAN_VIOLATION_KEY, 1, VCFHeaderLineType.String, "Type of mendelian violation."));
        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AC, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC"));
        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AF, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF"));
        headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AN, 1, VCFHeaderLineType.Integer, "Original AN"));

        for (final PedFile.PedTrio trio : pedFile.get().values()) {
            final File outputFile = new File(VCF_DIR, IOUtil.makeFileNameSafe(trio.getFamilyId() + IOUtil.VCF_FILE_EXTENSION));
            LOG.info(String.format("Writing %s violation VCF to %s", trio.getFamilyId(), outputFile.getAbsolutePath()));

            final VariantContextWriter out = new VariantContextWriterBuilder()
                    .setOutputFile(outputFile)
                    .unsetOption(INDEX_ON_THE_FLY)
                    .build();

            final VCFHeader newHeader = new VCFHeader(headerLines, CollectionUtil.makeList(trio.getMaternalId(), trio.getPaternalId(), trio.getIndividualId()));
            final TreeSet<VariantContext> orderedViolations = new TreeSet<>(vcComparator);

            orderedViolations.addAll(result.violations().get(trio.getFamilyId()));
            out.writeHeader(newHeader);
            orderedViolations.forEach(out::add);

            out.close();
        }
    }
}
 
示例27
@Test(expectedExceptions = IllegalArgumentException.class)
public void testGetWronglyFormattedDateFromVcfOtherHeaderLine() {
    try (final VCFFileReader in = new VCFFileReader(TEST_VCF_FILE, false)) {
        final VCFHeader header = in.getFileHeader();
        final String badlyFormattedDateString = "04/18/2019";
        header.addMetaDataLine(new VCFHeaderLine("badDate", badlyFormattedDateString));
        InfiniumVcfFields.getDateFromVcfOtherHeaderLine(header, "badDate", autocallDateFormat);
    }
}
 
示例28
@Override public void setHeader(VCFHeader header) {
	VCFHeaderVersion version = null;

	// Normally AbstractVCFCodec parses the header and thereby sets the
	// version field. It gets used later on so we need to set it.
	for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) {
		if (VCFHeaderVersion.isFormatString(line.getKey())) {
			version = VCFHeaderVersion.toHeaderVersion(line.getValue());
			break;
		}
	}

	codec.setHeaderAndVersion(header, version);
}
 
示例29
/**
 * @return If addOutputVCFCommandLine is true, a set of VCF header lines containing the tool name, version,
 * date and command line, otherwise an empty set.
 */
protected Set<VCFHeaderLine> getDefaultToolVCFHeaderLines() {
    if (addOutputVCFCommandLine) {
        return GATKVariantContextUtils
                .getDefaultVCFHeaderLines(getToolkitShortName(), this.getClass().getSimpleName(),
                        getVersion(), Utils.getDateTimeForDisplay((ZonedDateTime.now())), getCommandLine());
    } else {
        return new HashSet<>();
    }
}
 
示例30
/**
 * @return If addOutputVCFCommandLine is true, a set of VCF header lines containing the tool name, version,
 * date and command line, otherwise an empty set.
 */
protected Set<VCFHeaderLine> getDefaultToolVCFHeaderLines() {
    if (addOutputVCFCommandLine) {
        return GATKVariantContextUtils
                .getDefaultVCFHeaderLines(getToolkitShortName(), this.getClass().getSimpleName(),
                        getVersion(), Utils.getDateTimeForDisplay((ZonedDateTime.now())), getCommandLine());
    } else {
        return new HashSet<>();
    }
}