Java源码示例:htsjdk.variant.vcf.VCFHeaderLine
示例1
@NotNull
public static VCFHeader generateOutputHeader(@NotNull final VCFHeader header, @NotNull final String sampleName) {
final VCFHeader outputVCFHeader = new VCFHeader(header.getMetaDataInInputOrder(), Sets.newHashSet(sampleName));
outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD"));
outputVCFHeader.addMetaDataLine(new VCFHeaderLine("StrelkaGATKCompatibility",
"Added GT fields to strelka calls for gatk compatibility."));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("MAPPABILITY", 1, VCFHeaderLineType.Float, "Mappability (percentage)"));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("SOMATIC_PON_COUNT",
1,
VCFHeaderLineType.Integer,
"Number of times the variant appears in the somatic PON"));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("GERMLINE_PON_COUNT",
1,
VCFHeaderLineType.Integer,
"Number of times the variant appears in the germline PON"));
return outputVCFHeader;
}
示例2
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
headerLines.addAll(getDefaultToolVCFHeaderLines());
vcfWriter = createVCFWriter(outputVcf);
vcfWriter.writeHeader(vcfHeader);
final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
.collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
.mapToDouble(mixingfractionsMap::get).toArray();
}
示例3
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
Mutect2FilteringEngine.M_2_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add);
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.ARTIFACT_IN_NORMAL_FILTER_NAME, "artifact_in_normal"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_BASE_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median base quality"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_MAPPING_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median mapping quality"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_CLIPPING_DIFFERENCE_FILTER_NAME, "ref - alt median clipping"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, "abs(ref - alt) median fragment length"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.READ_POSITION_FILTER_NAME, "median distance of alt variants from end of reads"));
headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.CONTAMINATION_FILTER_NAME, "contamination"));
headerLines.addAll(getDefaultToolVCFHeaderLines());
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
vcfWriter = createVCFWriter(new File(outputVcf));
vcfWriter.writeHeader(vcfHeader);
}
示例4
private static VariantContextWriter getVariantContextWriter(final File outputFile,
final File referenceSequenceFileName,
final String sample,
final String source,
final ReferenceSequenceFile ref) {
final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder()
.setReferenceDictionary(ref.getSequenceDictionary())
.setOutputFile(outputFile).build();
final Set<VCFHeaderLine> lines = new LinkedHashSet<>();
lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath()));
lines.add(new VCFHeaderLine("source", source));
lines.add(new VCFHeaderLine("fileDate", new Date().toString()));
lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY));
lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS));
lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample));
header.setSequenceDictionary(ref.getSequenceDictionary());
variantContextWriter.writeHeader(header);
return variantContextWriter;
}
示例5
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
headerLines.addAll(getDefaultToolVCFHeaderLines());
vcfWriter = createVCFWriter(outputVcf);
vcfWriter.writeHeader(vcfHeader);
final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
.collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
.mapToDouble(mixingfractionsMap::get).toArray();
}
示例6
private void writeVCFHeader(VariantContextWriter vcfWriter) {
// setup the header fields
final VCFHeader inputHeader = getHeaderForVariants();
Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>();
hInfo.addAll(inputHeader.getMetaDataInSortedOrder());
boolean hasInfoKey = hInfo.stream().anyMatch(
x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey));
if (!hasInfoKey){
throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey));
}
if (removeOldFilters){
hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine);
}
addTrancheHeaderFields(SNPString, snpTranches, hInfo);
addTrancheHeaderFields(INDELString, indelTranches, hInfo);
final TreeSet<String> samples = new TreeSet<>();
samples.addAll(inputHeader.getGenotypeSamples());
hInfo.addAll(getDefaultToolVCFHeaderLines());
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
}
示例7
@Override
public void onTraversalStart() {
realignmentEngine = new RealignmentEngine(realignmentArgumentCollection);
vcfWriter = createVCFWriter(new File(outputVcf));
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY));
headerLines.addAll(getDefaultToolVCFHeaderLines());
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
vcfWriter.writeHeader(vcfHeader);
bamHeader = getHeaderForReads();
samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(bamHeader)));
referenceReader = AssemblyBasedCallerUtils.createReferenceReader(Utils.nonNull(referenceArguments.getReferenceSpecifier()));
assemblyEngine = MTAC.createReadThreadingAssembler();
likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs);
haplotypeBAMWriter = bamOutputPath == null ? Optional.empty() :
Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(bamOutputPath), true, false, getHeaderForSAMWriter()));
}
示例8
private static CpxAndReInterpretedSimpleVariants extractCpxVariants(final JavaSparkContext ctx,
final JavaRDD<AssemblyContigWithFineTunedAlignments> contigsWithCpxAln,
final SvDiscoveryInputMetaData svDiscoveryInputMetaData,
final JavaRDD<GATKRead> assemblyRawAlignments,
final String outputPrefixWithSampleName) {
final Logger toolLogger = svDiscoveryInputMetaData.getDiscoverStageArgs().runInDebugMode ? svDiscoveryInputMetaData.getToolLogger() : null;
final Set<VCFHeaderLine> defaultToolVCFHeaderLines = svDiscoveryInputMetaData.getDefaultToolVCFHeaderLines();
final List<VariantContext> complexVariants =
CpxVariantInterpreter.makeInterpretation(contigsWithCpxAln, svDiscoveryInputMetaData);
SVVCFWriter.writeVCF(complexVariants, outputPrefixWithSampleName + COMPLEX_CHIMERA_VCF_FILE_NAME,
svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(),
defaultToolVCFHeaderLines, toolLogger);
final JavaRDD<VariantContext> complexVariantsRDD = ctx.parallelize(complexVariants);
final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants reInterpretedSimple =
SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariantsRDD, svDiscoveryInputMetaData, assemblyRawAlignments);
final SAMSequenceDictionary refSeqDict = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue();
final String derivedOneSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_1_SEG_CALL_VCF_FILE_NAME;
final String derivedMultiSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_MULTI_SEG_CALL_VCF_FILE_NAME;
SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger);
SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger);
return new CpxAndReInterpretedSimpleVariants(complexVariants, reInterpretedSimple.getMergedReinterpretedCalls());
}
示例9
@Override
protected void runTool(final JavaSparkContext ctx) {
// TODO: 5/9/18 getback sample name in output files
final SAMFileHeader headerForReads = getHeaderForReads();
final Set<VCFHeaderLine> defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines();
final SvDiscoveryInputMetaData svDiscoveryInputMetaData =
new SvDiscoveryInputMetaData(ctx, discoverStageArgs, nonCanonicalChromosomeNamesFile,
derivedSimpleVCFPrefix,
null, null, null, null,
headerForReads, getReference(), defaultToolVCFHeaderLines, localLogger);
final JavaRDD<VariantContext> complexVariants = new VariantsSparkSource(ctx)
.getParallelVariantContexts(complexVCF, getIntervals());
final JavaRDD<GATKRead> assemblyRawAlignments = getReads();
final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants extract =
SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariants, svDiscoveryInputMetaData, assemblyRawAlignments);
final String derivedOneSegmentSimpleVCF = derivedSimpleVCFPrefix + "_1_seg.vcf";
final String derivedMultiSegmentSimpleVCF = derivedSimpleVCFPrefix + "_multi_seg.vcf";
final VCFHeader vcfHeader = VariantsSparkSource.getHeader(complexVCF);
SVVCFWriter.writeVCF(extract.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), defaultToolVCFHeaderLines, logger);
SVVCFWriter.writeVCF(extract.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), defaultToolVCFHeaderLines, logger);
}
示例10
public SvDiscoveryInputMetaData(final JavaSparkContext ctx,
final DiscoverVariantsFromContigAlignmentsSparkArgumentCollection discoverStageArgs,
final String nonCanonicalChromosomeNamesFile,
final String outputPath,
final ReadMetadata readMetadata,
final List<SVInterval> assembledIntervals,
final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTargetLinks,
final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast,
final SAMFileHeader headerForReads,
final ReferenceMultiSparkSource reference,
final Set<VCFHeaderLine> defaultToolVCFHeaderLines,
final Logger toolLogger) {
final SAMSequenceDictionary sequenceDictionary = headerForReads.getSequenceDictionary();
final Broadcast<Set<String>> canonicalChromosomesBroadcast =
ctx.broadcast(SVUtils.getCanonicalChromosomes(nonCanonicalChromosomeNamesFile, sequenceDictionary));
final String sampleId = SVUtils.getSampleId(headerForReads);
this.referenceData = new ReferenceData(canonicalChromosomesBroadcast, ctx.broadcast(reference), ctx.broadcast(sequenceDictionary));
this.sampleSpecificData = new SampleSpecificData(sampleId, cnvCallsBroadcast, assembledIntervals, evidenceTargetLinks, readMetadata, ctx.broadcast(headerForReads));
this.discoverStageArgs = discoverStageArgs;
this.outputPath = outputPath;
this.defaultToolVCFHeaderLines = defaultToolVCFHeaderLines;
this.toolLogger = toolLogger;
}
示例11
private static void writeVariants(final String fileName, final List<VariantContext> variantsArrayList,
final SAMSequenceDictionary referenceSequenceDictionary,
final Set<VCFHeaderLine> defaultToolVCFHeaderLines) {
try (final OutputStream outputStream
= new BufferedOutputStream(BucketUtils.createFile(fileName))) {
final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary);
final VCFHeader vcfHeader = getVcfHeader(referenceSequenceDictionary);
defaultToolVCFHeaderLines.forEach(vcfHeader::addMetaDataLine);
vcfWriter.writeHeader(vcfHeader);
variantsArrayList.forEach(vcfWriter::add);
vcfWriter.close();
} catch (final IOException e) {
throw new GATKException("Could not create output file", e);
}
}
示例12
@Test
public void testGetDefaultToolVCFHeaderLines() {
final TestGATKSparkToolWithVariants tool = new TestGATKSparkToolWithVariants();
final String[] args = {"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"};
tool.instanceMain(args);
Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines();
VCFHeader hdr = new VCFHeader(stdHeaderLines);
VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source");
Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName());
VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine");
Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName());
String commandLineString = commandLine.toString();
assertContains(commandLineString,"CommandLine=");
assertContains(commandLineString,"Version=");
assertContains(commandLineString,"Date=");
}
示例13
@Test
public void testGetDefaultToolVCFHeaderLines() throws IOException {
final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures();
final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf");
final String[] args = {"--mask", vcfFile.getCanonicalPath(), "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"};
tool.instanceMain(args);
Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines();
VCFHeader hdr = new VCFHeader(stdHeaderLines);
VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source");
Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName());
VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine");
Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName());
String commandLineString = commandLine.toString();
assertContains(commandLineString,"CommandLine=");
assertContains(commandLineString,"Version=");
assertContains(commandLineString,"Date=");
}
示例14
private static VCFHeader getMinimalVCFHeader() {
final Set<VCFHeaderLine> headerlines = new LinkedHashSet<>();
VCFStandardHeaderLines.addStandardFormatLines(headerlines, true,
VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY,
VCFConstants.GENOTYPE_ALLELE_DEPTHS);
VCFStandardHeaderLines.addStandardInfoLines(headerlines, true,
VCFConstants.DEPTH_KEY,
VCFConstants.RMS_MAPPING_QUALITY_KEY,
VCFConstants.MAPPING_QUALITY_ZERO_KEY );
Arrays.asList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY,
GATKVCFConstants.CLIPPING_RANK_SUM_KEY,
GATKVCFConstants.MLE_ALLELE_COUNT_KEY,
GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY,
GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY,
GATKVCFConstants.READ_POS_RANK_SUM_KEY)
.forEach( c -> headerlines.add(GATKVCFHeaderLines.getInfoLine(c)));
headerlines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY));
return new VCFHeader(headerlines, Collections.singleton(SAMPLE_NAME));
}
示例15
@Test
public void testCommandIncludedInOutputHeader() throws IOException {
final List<String> vcfInputs = LOCAL_GVCFS;
final String workspace = createTempDir("genomicsdb-tests").getAbsolutePath() + "/workspace";
writeToGenomicsDB(vcfInputs, INTERVAL, workspace, 0, false, 0, 1);
try(final FeatureReader<VariantContext> genomicsDBFeatureReader =
getGenomicsDBFeatureReader(workspace, b38_reference_20_21))
{
final VCFHeader header = (VCFHeader) genomicsDBFeatureReader.getHeader();
final Optional<VCFHeaderLine> commandLineHeaderLine = header.getMetaDataInSortedOrder().stream()
.filter(line -> line.getValue().contains(GenomicsDBImport.class.getSimpleName()))
.findAny();
Assert.assertTrue(commandLineHeaderLine.isPresent(), "no headerline was present containing information about the GenomicsDBImport command");
}
}
示例16
@NotNull
public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) {
template.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion));
template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, 1, VCFHeaderLineType.Float, PURPLE_AF_DESC));
template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, 1, VCFHeaderLineType.Float, PURPLE_CN_DESC));
template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_VARIANT_CN_INFO, 1, VCFHeaderLineType.Float, PURPLE_PLOIDY_DESC));
template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_MINOR_ALLELE_CN_INFO, 1, VCFHeaderLineType.Float, PURPLE_MINOR_ALLELE_PLOIDY_DESC));
template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_GERMLINE_INFO, 1, VCFHeaderLineType.String, PURPLE_GERMLINE_DESC));
template.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_BIALLELIC_FLAG, 0, VCFHeaderLineType.Flag, PURPLE_BIALLELIC_DESC));
return template;
}
示例17
@NotNull
public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) {
final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples());
outputVCFHeader.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion));
outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT"));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.RECOVERED,
0,
VCFHeaderLineType.Flag,
RECOVERED_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.INFERRED, 0, VCFHeaderLineType.Flag, INFERRED_DESC));
outputVCFHeader.addMetaDataLine(new VCFFilterHeaderLine(INFERRED, INFERRED_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.IMPRECISE,
0,
VCFHeaderLineType.Flag,
IMPRECISE_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(CIPOS, 2, VCFHeaderLineType.Integer, CIPOS_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(SVTYPE, 1, VCFHeaderLineType.String, SVTYPE_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_AF_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_METHOD, 1, VCFHeaderLineType.String, RECOVERY_METHOD_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_FILTER, UNBOUNDED, VCFHeaderLineType.String, RECOVERY_FILTER_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_JUNCTION_COPY_NUMBER_INFO, 1, VCFHeaderLineType.Float,
PURPLE_JUNCTION_COPY_NUMBER_DESC));
outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_CHANGE_INFO,
UNBOUNDED,
VCFHeaderLineType.Float,
PURPLE_CN_CHANGE_DESC));
return outputVCFHeader;
}
示例18
PonVCF(final String output, int sampleSize) {
writer = new VariantContextWriterBuilder().setOutputFile(output)
.modifyOption(Options.INDEX_ON_THE_FLY, false)
.modifyOption(Options.USE_ASYNC_IO, false)
.modifyOption(Options.DO_NOT_WRITE_GENOTYPES, true)
.build();
final VCFHeader header = new VCFHeader();
header.addMetaDataLine(new VCFInfoHeaderLine(PON_COUNT, 1, VCFHeaderLineType.Integer, "how many samples had the variant"));
header.addMetaDataLine(new VCFInfoHeaderLine(PON_TOTAL, 1, VCFHeaderLineType.Integer, "total depth"));
header.addMetaDataLine(new VCFInfoHeaderLine(PON_MAX, 1, VCFHeaderLineType.Integer, "max depth"));
header.addMetaDataLine(new VCFHeaderLine("PonInputSampleCount", String.valueOf(sampleSize)));
writer.writeHeader(header);
}
示例19
/**
* Writes an appropriate VCF header, given our arguments, to the provided writer
*
* @param vcfWriter writer to which the header should be written
*/
public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary,
final Set<VCFHeaderLine> defaultToolHeaderLines) {
Utils.nonNull(vcfWriter);
final Set<VCFHeaderLine> headerInfo = new HashSet<>();
headerInfo.addAll(defaultToolHeaderLines);
headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
// all annotation fields from VariantAnnotatorEngine
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// all callers need to add these standard annotation header lines
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
// all callers need to add these standard FORMAT field header lines
VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_PL_KEY);
if ( ! hcArgs.doNotRunPhysicalPhasing ) {
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
if ( emitReferenceConfidence() ) {
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
}
final VCFHeader vcfHeader = new VCFHeader(headerInfo, sampleSet);
vcfHeader.setSequenceDictionary(sequenceDictionary);
vcfWriter.writeHeader(vcfHeader);
}
示例20
static Sex getFingerprintSex(final File file) {
if (file != null) {
try (VCFFileReader reader = new VCFFileReader(file, false)) {
final VCFHeader header = reader.getFileHeader();
final VCFHeaderLine gender = header.getMetaDataLine("gender");
if (gender != null) {
return Sex.valueOf(gender.getValue());
}
}
}
return Sex.Unknown;
}
示例21
public static String getValueFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
if (otherHeaderLine != null) {
return otherHeaderLine.getValue();
} else {
throw new IllegalArgumentException("Input VCF file is missing header line of type '" + keyName + "'");
}
}
示例22
public static Integer getIntegerFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
if (otherHeaderLine != null) {
return Integer.valueOf(otherHeaderLine.getValue());
} else {
throw new IllegalArgumentException("Input VCF file is missing header line of type '" + keyName + "'");
}
}
示例23
public static String getOptionalValueFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
if (otherHeaderLine != null) {
return otherHeaderLine.getValue();
} else {
return null;
}
}
示例24
private void addAdditionalHeaderFields(VCFHeader header) {
header.addMetaDataLine(new VCFHeaderLine(InfiniumVcfFields.ZCALL_VERSION, ZCALL_VERSION));
header.addMetaDataLine(new VCFHeaderLine(InfiniumVcfFields.ZCALL_THRESHOLDS, ZCALL_THRESHOLDS_FILE.getName()));
header.addMetaDataLine(new VCFInfoHeaderLine(InfiniumVcfFields.ZTHRESH_X, 1, VCFHeaderLineType.Float, "zCall X threshold"));
header.addMetaDataLine(new VCFInfoHeaderLine(InfiniumVcfFields.ZTHRESH_Y, 1, VCFHeaderLineType.Float, "zCall Y threshold"));
header.addMetaDataLine(new VCFFormatHeaderLine(InfiniumVcfFields.GTA, 1, VCFHeaderLineType.String, "Illumina Autocall Genotype"));
header.addMetaDataLine(new VCFFormatHeaderLine(InfiniumVcfFields.GTZ, 1, VCFHeaderLineType.String, "zCall Genotype"));
}
示例25
private String getValueFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String keyName) {
VCFHeaderLine otherHeaderLine = vcfHeader.getOtherHeaderLine(keyName);
if (otherHeaderLine != null) {
return otherHeaderLine.getValue();
} else {
throw new IllegalArgumentException("Input VCF file is missing header line of type '" + keyName + "'");
}
}
示例26
private void writeAllViolations(final MendelianViolationDetector.Result result) {
if (VCF_DIR != null) {
LOG.info(String.format("Writing family violation VCFs to %s/", VCF_DIR.getAbsolutePath()));
final VariantContextComparator vcComparator = new VariantContextComparator(inputHeader.get().getContigLines());
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputHeader.get().getMetaDataInInputOrder());
headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.MENDELIAN_VIOLATION_KEY, 1, VCFHeaderLineType.String, "Type of mendelian violation."));
headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AC, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC"));
headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AF, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF"));
headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AN, 1, VCFHeaderLineType.Integer, "Original AN"));
for (final PedFile.PedTrio trio : pedFile.get().values()) {
final File outputFile = new File(VCF_DIR, IOUtil.makeFileNameSafe(trio.getFamilyId() + IOUtil.VCF_FILE_EXTENSION));
LOG.info(String.format("Writing %s violation VCF to %s", trio.getFamilyId(), outputFile.getAbsolutePath()));
final VariantContextWriter out = new VariantContextWriterBuilder()
.setOutputFile(outputFile)
.unsetOption(INDEX_ON_THE_FLY)
.build();
final VCFHeader newHeader = new VCFHeader(headerLines, CollectionUtil.makeList(trio.getMaternalId(), trio.getPaternalId(), trio.getIndividualId()));
final TreeSet<VariantContext> orderedViolations = new TreeSet<>(vcComparator);
orderedViolations.addAll(result.violations().get(trio.getFamilyId()));
out.writeHeader(newHeader);
orderedViolations.forEach(out::add);
out.close();
}
}
}
示例27
@Test(expectedExceptions = IllegalArgumentException.class)
public void testGetWronglyFormattedDateFromVcfOtherHeaderLine() {
try (final VCFFileReader in = new VCFFileReader(TEST_VCF_FILE, false)) {
final VCFHeader header = in.getFileHeader();
final String badlyFormattedDateString = "04/18/2019";
header.addMetaDataLine(new VCFHeaderLine("badDate", badlyFormattedDateString));
InfiniumVcfFields.getDateFromVcfOtherHeaderLine(header, "badDate", autocallDateFormat);
}
}
示例28
@Override public void setHeader(VCFHeader header) {
VCFHeaderVersion version = null;
// Normally AbstractVCFCodec parses the header and thereby sets the
// version field. It gets used later on so we need to set it.
for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) {
if (VCFHeaderVersion.isFormatString(line.getKey())) {
version = VCFHeaderVersion.toHeaderVersion(line.getValue());
break;
}
}
codec.setHeaderAndVersion(header, version);
}
示例29
/**
* @return If addOutputVCFCommandLine is true, a set of VCF header lines containing the tool name, version,
* date and command line, otherwise an empty set.
*/
protected Set<VCFHeaderLine> getDefaultToolVCFHeaderLines() {
if (addOutputVCFCommandLine) {
return GATKVariantContextUtils
.getDefaultVCFHeaderLines(getToolkitShortName(), this.getClass().getSimpleName(),
getVersion(), Utils.getDateTimeForDisplay((ZonedDateTime.now())), getCommandLine());
} else {
return new HashSet<>();
}
}
示例30
/**
* @return If addOutputVCFCommandLine is true, a set of VCF header lines containing the tool name, version,
* date and command line, otherwise an empty set.
*/
protected Set<VCFHeaderLine> getDefaultToolVCFHeaderLines() {
if (addOutputVCFCommandLine) {
return GATKVariantContextUtils
.getDefaultVCFHeaderLines(getToolkitShortName(), this.getClass().getSimpleName(),
getVersion(), Utils.getDateTimeForDisplay((ZonedDateTime.now())), getCommandLine());
} else {
return new HashSet<>();
}
}