diff --git a/doc/CHANGELOG.md b/doc/CHANGELOG.md index 72a8643ee..c32663f5b 100644 --- a/doc/CHANGELOG.md +++ b/doc/CHANGELOG.md @@ -11,6 +11,27 @@ Added: - Replace old identity check with new Bayesian version in "main" QC output and plots + +Release 1.12.0: 2016-04-07 +-------------------------- + +Added: +- New ready_workflow.pl script to query iRODS for QC plex data, write VCF, +generate config YML, and set up an analysis directory +- Ruby workflows can read multiple VCF and plex manifest paths from +YML, and input them to the quality_control pipeline task +- Revised default pass threshold for Bayesian identity check + +Removed: +- genotyping_yml.pl and tests; functionality replaced by ready_workflow.pl +- Annotation modules exporting constants for irods metadata; constants have +been relocated to Metadata.pm in wtsi-npg/perl-irods-wrap + +Fixed: +- Refactored run_qc.pl for better handling of command-line arguments +- Updated for compatibility with latest dependency versions + + Release 1.11.6: 2016-02-08 -------------------------- @@ -19,6 +40,7 @@ Fixed: the SequenceScape warehouse that was not specific enough to allow it to proceed when a sample had been analysed more than once. + Release 1.11.5: 2015-12-15 -------------------------- @@ -34,6 +56,7 @@ Added: - Find genome reference path from iRODS metadata for VCF header; adds dependency on wtsi-npg/npg_tracking + Release 1.11.4: 2015-10-09 -------------------------- @@ -41,6 +64,7 @@ Fixed: - Bug in publish_infinium_genotypes.pl which caused it to exit when it detected bad or missing data. It now detects these files and skips them. + Release 1.11.3: 2015-09-18 -------------------------- @@ -53,6 +77,7 @@ Fixed: Changed: - bcftools version upgraded to 1.2 + Release 1.11.2: 2015-08-24 -------------------------- diff --git a/src/perl/Build.PL b/src/perl/Build.PL index be3f07e74..9e6838aac 100644 --- a/src/perl/Build.PL +++ b/src/perl/Build.PL @@ -29,21 +29,22 @@ my $build = Build->new 'Test::Compile' => 0 }, requires => { - 'Config::IniFiles' => '>= 2.8.6', - 'DBI' => '>= 1.63', - 'DBIx::Class' => '>= 0.082', - 'Data::Dump' => '>= 1.22', - 'DateTime' => '>= 1.18', - 'List::AllUtils' => '>= 0.09', - 'Log::Log4perl' => '>= 1.46', - 'Moose' => '>= 2.1', - 'MooseX::Types' => '>= 0.45', - 'Set::Scalar' => '>= 1.29', - 'Text::CSV' => '>= 1.33', - 'Try::Tiny' => '>= 0.22', - 'URI' => '>= 1.67', - 'WTSI::DNAP::Warehouse::Schema' => '>= 1.1', - 'WTSI::NPG::iRODS' => '>= 0.15.0' + 'Config::IniFiles' => '>= 2.8.6', + 'DBI' => '>= 1.63', + 'DBIx::Class' => '>= 0.082', + 'Data::Dump' => '>= 1.22', + 'DateTime' => '>= 1.18', + 'List::AllUtils' => '>= 0.09', + 'Log::Log4perl' => '>= 1.46', + 'Moose' => '>= 2.1', + 'MooseX::Types' => '>= 0.45', + 'npg_tracking::data::reference::find' => '>= 84.8', + 'Set::Scalar' => '>= 1.29', + 'Text::CSV' => '>= 1.33', + 'Try::Tiny' => '>= 0.22', + 'URI' => '>= 1.67', + 'WTSI::DNAP::Warehouse::Schema' => '>= 1.1', + 'WTSI::NPG::iRODS' => '>= 0.15.0' }, recommends => { 'UUID' => '>= 0.24', @@ -56,6 +57,8 @@ my $build = Build->new 'etc/states.ini' => 'etc/states.ini' }, conf_files => {'etc/log4perl.conf' => 'etc/log4perl.conf', 'etc/qc_config.json' => 'etc/qc_config.json', + 'etc/ready_qc_fluidigm.json' => 'etc/ready_qc_fluidigm.json', + 'etc/ready_qc_sequenom.json' => 'etc/ready_qc_sequenom.json', 'etc/illuminus_prefilter.json' => 'etc/illuminus_prefilter.json', 'etc/zcall_prefilter.json' => 'etc/zcall_prefilter.json', 'etc/reportIntro.tex' => 'etc/reportIntro.tex', diff --git a/src/perl/MANIFEST b/src/perl/MANIFEST index bf5150c62..27cd29a16 100644 --- a/src/perl/MANIFEST +++ b/src/perl/MANIFEST @@ -7,7 +7,7 @@ bin/create_test_database.pl bin/filter_illuminus_output.pl bin/find_modified_files.pl bin/gendermix_standalone.pl -bin/genotyping_yml.pl +bin/identity_simulation.pl bin/illuminus.pl bin/manifest_plex_intersection.pl bin/plate_heatmap_index.pl @@ -72,7 +72,6 @@ lib/WTSI/NPG/Database/DBIx.pm lib/WTSI/NPG/Database/MLWarehouse.pm lib/WTSI/NPG/Database/Warehouse.pm lib/WTSI/NPG/Expression/AnalysisPublisher.pm -lib/WTSI/NPG/Expression/Annotation.pm lib/WTSI/NPG/Expression/Annotator.pm lib/WTSI/NPG/Expression/ChipLoadingManifest.pm lib/WTSI/NPG/Expression/ChipLoadingManifestV1.pm @@ -83,7 +82,6 @@ lib/WTSI/NPG/Expression/Publisher.pm lib/WTSI/NPG/Expression/ResultSet.pm lib/WTSI/NPG/Expression/SampleProbeProfile.pm lib/WTSI/NPG/Genotyping.pm -lib/WTSI/NPG/Genotyping/Annotation.pm lib/WTSI/NPG/Genotyping/Annotator.pm lib/WTSI/NPG/Genotyping/Call.pm lib/WTSI/NPG/Genotyping/Database/Infinium.pm @@ -137,6 +135,7 @@ lib/WTSI/NPG/Genotyping/QC/QCPlotTests.pm lib/WTSI/NPG/Genotyping/QC/Reports.pm lib/WTSI/NPG/Genotyping/QC/SnpID.pm lib/WTSI/NPG/Genotyping/QC_wip/Check/Identity.pm +lib/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulator.pm lib/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesian.pm lib/WTSI/NPG/Genotyping/Reference.pm lib/WTSI/NPG/Genotyping/Sequenom/AssayDataObject.pm @@ -156,6 +155,7 @@ lib/WTSI/NPG/Genotyping/VCF/GtcheckWrapper.pm lib/WTSI/NPG/Genotyping/VCF/Header.pm lib/WTSI/NPG/Genotyping/VCF/HeaderParser.pm lib/WTSI/NPG/Genotyping/VCF/Parser.pm +lib/WTSI/NPG/Genotyping/VCF/PlexResultFinder.pm lib/WTSI/NPG/Genotyping/VCF/ReferenceFinder.pm lib/WTSI/NPG/Genotyping/VCF/Slurper.pm lib/WTSI/NPG/Genotyping/VCF/VCFDataSet.pm @@ -349,6 +349,7 @@ t/fluidigm_resultset/missing_tif/0123456789/0123456789.csv t/fluidigm_resultset/missing_tif/0123456789/Data/athos.tif t/fluidigm_resultset/missing_tif/0123456789/Data/porthos.tif t/fluidigm_subscriber.t +t/fluidigm_subscriber/chromosome_lengths_GRCh37.json t/fluidigm_subscriber/qc.tsv t/fluidigm_subscriber/S01_1381735059.csv t/fluidigm_subscriber/S01_1381735060.csv @@ -366,14 +367,10 @@ t/gender/input_xhet_large.txt t/gender_marker.t t/gender_marker_call.t t/gender_standalone.t -t/genotyping_yml.t -t/genotyping_yml/config.yml -t/genotyping_yml/genotype_illuminus.yml -t/genotyping_yml/genotype_zcall.yml -t/genotyping_yml/genotyping_DUMMY.db t/identity.t t/identity_check_sample_wip_bayesian.t t/identity_check_wip.t +t/identity_simulation_wip.t t/illuminus.t t/illuminus/example.json t/illuminus/example_all.iln @@ -441,6 +438,9 @@ t/publisher.t t/publisher/lorem.txt t/publisher/update/lorem.txt t/qc/.gitattributes +t/qc/check/identity/combined_identity_expected.json +t/qc/check/identity/expected_identity_results.json +t/qc/check/identity/expected_omit_results.json t/qc/check/identity/fake_genotyping.db t/qc/check/identity/fake_qc_genotypes.bed t/qc/check/identity/fake_qc_genotypes.bim @@ -452,9 +452,17 @@ t/qc/check/identity/fake_swap_genotypes.bim t/qc/check/identity/fake_swap_genotypes.fam t/qc/check/identity/fake_swap_genotypes.map t/qc/check/identity/fake_swap_genotypes.ped +t/qc/check/identity/identity_script_output.csv +t/qc/check/identity/identity_script_output.json +t/qc/check/identity/identity_script_output_alternate_prior.json t/qc/check/identity/qc_plex_calls.vcf t/qc/check/identity/qc_plex_calls_1.vcf t/qc/check/identity/qc_plex_calls_2.vcf +t/qc/check/identity/simulated_ecp.txt +t/qc/check/identity/simulated_qcr.txt +t/qc/check/identity/simulated_qcs.txt +t/qc/check/identity/simulated_smp.txt +t/qc/check/identity/simulated_xer.txt t/qc/check/identity/W30467_snp_set_info_1000Genomes.tsv t/qc/check/identity/W30467_snp_set_info_1000Genomes_1.tsv t/qc/check/identity/W30467_snp_set_info_1000Genomes_2.tsv @@ -467,6 +475,7 @@ t/qc_test_data/output_examples/identity_check.json t/qc_test_data/output_examples/magnitude.txt t/qc_test_data/output_examples/qc_exclusions.json t/qc_test_data/output_examples/qc_metrics.json +t/qc_test_data/output_examples/qc_results.csv t/qc_test_data/output_examples/qc_results.json t/qc_test_data/output_examples/sample_cr_het.txt t/qc_test_data/output_examples/sample_xhet_gender.txt @@ -491,7 +500,7 @@ t/query_project_samples.t t/query_project_samples/sample1.gtc t/query_project_samples/sample1_Grn.idat t/query_project_samples/sample1_Red.idat -t/ready_plex.t +t/ready_workflow.t t/reference_finder.t t/reports.t t/reports/crHetDensityHeatmap.pdf @@ -606,6 +615,7 @@ t/sequenom_assay_result.t t/sequenom_assay_resultset.t t/sequenom_publisher.t t/sequenom_subscriber.t +t/sequenom_subscriber/chromosome_lengths_GRCh37.json t/sequenom_subscriber/sequenom_001.csv t/sequenom_subscriber/sequenom_002.csv t/sequenom_subscriber/sequenom_003.csv @@ -653,7 +663,6 @@ t/vcf/fluidigm_004.csv t/vcf/fluidigm_header_1.txt t/vcf/fluidigm_header_2.txt t/vcf/fluidigm_inputs.txt -t/vcf/fluidigm_samples.json t/vcf/pairwise_discordance_fluidigm.json t/vcf/pairwise_discordance_sequenom.json t/vcf/qc_fluidigm_snp_info_GRCh37.tsv @@ -667,7 +676,6 @@ t/vcf/sequenom_alternate_snp_002.csv t/vcf/sequenom_alternate_snp_003.csv t/vcf/sequenom_alternate_snp_004.csv t/vcf/sequenom_inputs.txt -t/vcf/sequenom_samples.json t/vcf/W30467_snp_set_info_GRCh37.tsv t/vcf/W30467_snp_set_info_GRCh37_1.tsv t/WTSI/NPG/Database/MLWarehouseTest.pm @@ -700,7 +708,9 @@ t/WTSI/NPG/Genotyping/Infinium/InfiniumDataObjectTest.pm t/WTSI/NPG/Genotyping/Infinium/PublisherTest.pm t/WTSI/NPG/Genotyping/Infinium/ResultSetTest.pm t/WTSI/NPG/Genotyping/Infinium/SampleQueryTest.pm +t/WTSI/NPG/Genotyping/QC/CollationTest.pm t/WTSI/NPG/Genotyping/QC/IdentityTest.pm +t/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulatorTest.pm t/WTSI/NPG/Genotyping/QC_wip/Check/IdentityTest.pm t/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesianTest.pm t/WTSI/NPG/Genotyping/ScriptsTest.pm @@ -713,11 +723,11 @@ t/WTSI/NPG/Genotyping/SNPSetPublisherTest.pm t/WTSI/NPG/Genotyping/SNPSetTest.pm t/WTSI/NPG/Genotyping/SNPTest.pm t/WTSI/NPG/Genotyping/UpdatePlinkAnnotationTest.pm -t/WTSI/NPG/Genotyping/VCF/ReadyPlexCallsTest.pm +t/WTSI/NPG/Genotyping/VCF/ReadyWorkflowTest.pm t/WTSI/NPG/Genotyping/VCF/ReferenceFinderTest.pm t/WTSI/NPG/Genotyping/VCF/VCFTest.pm -t/WTSI/NPG/Genotyping/YMLTest.pm t/WTSI/NPG/PublisherTest.pm t/WTSI/NPG/SimplePublisherTest.pm +t/WTSI/NPG/Test.pm t/WTSI/NPG/UtilitiesTest.pm t/wtsi_genotyping.t diff --git a/src/perl/bin/check_identity_bed_wip.pl b/src/perl/bin/check_identity_bed_wip.pl index 33133f68b..ebc60af02 100755 --- a/src/perl/bin/check_identity_bed_wip.pl +++ b/src/perl/bin/check_identity_bed_wip.pl @@ -54,13 +54,13 @@ sub run { my $log4perl_config; my $json_path; my $pass_threshold; - my @plex_manifests; - my @plex_manifests_irods; + my $plex_manifests; + my $plex_manifests_irods; my $plink; my $sample_json; my $sample_mismatch_prior; my $swap_threshold; - my @vcf; # array for (maybe) multiple VCF inputs + my $vcf; my $verbose; GetOptions( @@ -74,12 +74,12 @@ sub run { 'json=s' => \$json_path, 'prior=f' => \$sample_mismatch_prior, 'pass_threshold=f' => \$pass_threshold, - 'plex=s' => \@plex_manifests, - 'plex_irods=s' => \@plex_manifests_irods, + 'plex=s' => \$plex_manifests, + 'plex_irods=s' => \$plex_manifests_irods, 'plink=s' => \$plink, 'sample_json=s' => \$sample_json, 'swap_threshold=f' => \$swap_threshold, - 'vcf=s' => \@vcf, + 'vcf=s' => \$vcf, 'verbose' => \$verbose, 'xer=f' => \$expected_error_rate); @@ -135,12 +135,30 @@ sub run { ### read SNPSet object(s) from file and/or iRODS, create union set ### my @snpsets; - foreach my $plex (@plex_manifests) { - push @snpsets, WTSI::NPG::Genotyping::SNPSet->new($plex); + if ($plex_manifests) { + my @plex_manifests = split(/,/msx, $plex_manifests); + foreach my $plex (@plex_manifests) { + unless (-e $plex) { + $log->logcroak("Plex manifest filesystem path '", $plex, + "' does not exist. Paths must be supplied as ", + "a comma-separated list; individual paths ", + "cannot contain commas."); + } + push @snpsets, WTSI::NPG::Genotyping::SNPSet->new($plex); + } } - foreach my $plex (@plex_manifests_irods) { - my $plex_obj = WTSI::NPG::iRODS::DataObject->new($irods, $plex); - push @snpsets, WTSI::NPG::Genotyping::SNPSet->new($plex_obj); + if ($plex_manifests_irods) { + my @plex_manifests_irods = split(/,/msx, $plex_manifests_irods); + foreach my $plex (@plex_manifests_irods) { + unless ($irods->is_object($plex)) { + $log->logcroak("Plex manifest iRODS path '", $plex, + "' does not exist. Paths must be supplied as ", + "a comma-separated list; individual paths ", + "cannot contain commas."); + } + my $plex_obj = WTSI::NPG::iRODS::DataObject->new($irods, $plex); + push @snpsets, WTSI::NPG::Genotyping::SNPSet->new($plex_obj); + } } if (scalar @snpsets == 0) { $log->logcroak("Must supply at least one plex manifest using ", @@ -189,17 +207,20 @@ sub run { ### read QC plex calls from VCF file(s) ### my %qc_calls; - if (!(@vcf)) { - $log->logcroak("At least one --vcf argument is required"); + if (!$vcf) { + $log->logcroak("At least one VCF path is required. Multiple paths ", + "may be supplied as a comma-separated list; ", + "individual paths cannot contain commas."); } - foreach my $vcf (@vcf) { + my @vcf= split(/,/msx, $vcf); + foreach my $vcf_path (@vcf) { my $vcf_fh; - if (! -e $vcf) { + if (! -e $vcf_path) { $log->logcroak("File argument to --vcf does not exist: '", - $vcf, "'"); + $vcf_path, "'"); } else { - open $vcf_fh, "<", $vcf || - $log->logcroak("Cannot open VCF input '", $vcf, "'"); + open $vcf_fh, "<", $vcf_path || + $log->logcroak("Cannot open VCF input '", $vcf_path, "'"); } my %slurp_args = ( input_filehandle => $vcf_fh, @@ -208,7 +229,7 @@ sub run { my $vcf_data = WTSI::NPG::Genotyping::VCF::Slurper->new( %slurp_args)->read_dataset(); close $vcf_fh || $log->logcroak("Cannot close VCF input '", - $vcf, "'"); + $vcf_path, "'"); my %vcf_calls = %{$vcf_data->calls_by_sample()}; foreach my $ssid (keys %vcf_calls) { my $uri = $ssid_to_uri{$ssid}; @@ -249,24 +270,25 @@ =head1 SYNOPSIS --json=PATH Path for JSON output. Required. May be '-' for STDOUT. --pass_threshold=NUM Minimum similarity to pass identity check. Optional. - --plex=PATH Path to .tsv manifest for a QC plex SNP set. Can - give multiple arguments for multiple plex files, eg. - '--plex file1.tsv --plex file2.tsv'. At least one - manifest must be supplied using the --plex and/or - --plex_irods arguments. - --plex_irods=PATH Location of iRODS data object corresponding to .tsv - manifest for a QC plex SNP set. Can give multiple - arguments, similarly to --plex. At least one - manifest must be supplied using the --plex and/or - --plex_irods arguments. + --plex=PATH Path to one or more .tsv manifests for QC plex + SNP sets. Multiple plex manifests are given as a + comma-separated list; the paths themselves may not + contain commas. + --plex_irods=PATH Location of one or more iRODS data objects + corresponding to .tsv manifest for QC plex SNP + sets. Can give multiple arguments, similarly to + --plex. At least one manifest must be supplied + using the --plex and/or --plex_irods arguments. --plink=STEM Plink binary stem (path omitting the .bed, .bim, .fam suffix) for production data. --sample_json=PATH JSON file for translating between Sanger sample ID and sample URI. Required. --swap_threshold=NUM Minimum cross-similarity to warn of sample swap. Optional. - --vcf=PATH Path to VCF input file. Can give multiple arguments - for multiple VCF inputs, similarly to --plex. + --vcf=PATH Path to one or more VCF input files. Can give + multiple paths as a comma-separated list, + similarly to --plex. Must supply at least one VCF + path. --verbose Turn on verbose logging. Optional. =head1 DESCRIPTION @@ -285,7 +307,7 @@ =head1 AUTHOR =head1 COPYRIGHT AND DISCLAIMER -Copyright (c) 2015 Genome Research Limited. All Rights Reserved. +Copyright (c) 2015, 2016 Genome Research Limited. All Rights Reserved. This program is free software: you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General diff --git a/src/perl/bin/genotyping_yml.pl b/src/perl/bin/genotyping_yml.pl deleted file mode 100755 index f5fed9875..000000000 --- a/src/perl/bin/genotyping_yml.pl +++ /dev/null @@ -1,230 +0,0 @@ -#!/software/bin/perl - -use utf8; - -package main; - -use warnings; -use strict; -use Getopt::Long; -use Log::Log4perl qw(:easy); -use Pod::Usage; -use YAML qw /DumpFile/; - -# Prototype script for simplifying use of the genotyping pipeline -# Generate appropriate .yml files for use by Percolate - -Log::Log4perl->easy_init($ERROR); - -our $VERSION = ''; -our $PERCOLATE_LOG_NAME = 'percolate.log'; - -run() unless caller(); - -sub run { - - my ($outdir, $workdir, $manifest, $plex_manifest, $dbfile, $run, $egt, - $verbose, $host, $workflow, $chunk_size, $memory, $zstart, $ztotal); - - my $log = Log::Log4perl->get_logger(); - - GetOptions('outdir=s' => \$outdir, - 'workdir=s' => \$workdir, - 'manifest=s' => \$manifest, - 'plex_manifest=s' => \$plex_manifest, - 'host=s' => \$host, - 'dbfile=s' => \$dbfile, - 'run=s' => \$run, - 'egt=s' => \$egt, - 'verbose' => \$verbose, - 'workflow=s' => \$workflow, - 'chunk_size=i' => \$chunk_size, - 'memory=i' => \$memory, - 'zstart=i' => \$zstart, - 'ztotal=i' => \$ztotal, - 'help' => sub { pod2usage(-verbose => 2, -exitval => 0) }, - ); - $outdir ||= '.'; - if (!($workdir && $run)) { - $log->logcroak("Must specify pipeline run name and working directory"); - } elsif (!(-e $outdir && -d $outdir)) { - $log->logcroak("Output path '$outdir' does not exist or is not a directory"); - } elsif (-e $workdir && !(-d $workdir)) { - $log->logcroak("Working directory path '$workdir' already exists, and is not a directory"); - } elsif (!(-e $workdir)) { - $log->logwarn("Warning: Pipeline working directory '$workdir' does not exist; must be created before running workflow."); - } - if ($workdir !~ '/$') { $workdir .= '/'; } # ensure $workdir ends with / - if ($outdir !~ '/$') { $outdir .= '/'; } # similarly for $outdir - if ($verbose) { print "WORKDIR: $workdir\n"; } - - $dbfile ||= 'genotyping.db'; - my $dbpath = $workdir.$dbfile; - if (! -e $dbpath) { - $log->logwarn("Warning: Pipeline database '$dbpath' does not exist; must be created before running workflow."); - } - $host ||= 'farm3-head2'; - # illuminus paralellizes by SNP, other callers by sample - if ($workflow && $workflow eq 'illuminus') { $chunk_size ||= 4000; } - else { $chunk_size ||= 40; } - $memory ||= 2048, - $zstart ||= 7; - $ztotal ||= 1; - - my %config = ( - 'root_dir' => $workdir, - 'log' => $workdir.$PERCOLATE_LOG_NAME, - 'log_level' => 'DEBUG', - 'msg_host' => $host, - 'msg_port' => '11300', - 'async' => 'lsf', - 'max_processes' => '250' - ); - DumpFile($outdir.'config.yml', (\%config)); - - if ($workflow) { - my @params = ($outdir, $dbpath, $run, $workdir, $manifest, $plex_manifest, $chunk_size, $memory); - if (! $manifest) { - $log->logcroak("Must specify --manifest for workflow!"); - } elsif (! -e $manifest) { - $log->logwarn("Warning: Manifest '$manifest' does not exist, ", - "must be created before running workflow."); - } elsif (! $plex_manifest) { - $log->logcroak("Must specify --plex-manifest for workflow!"); - } elsif (! -e $plex_manifest) { - $log->logwarn("Warning: Plex manifest '$plex_manifest' does not ", - "exist, must be created before running workflow."); - } elsif ($workflow eq 'illuminus') { - write_illuminus(@params); - } elsif ($workflow eq 'zcall') { - if (!$egt) { - $log->logcroak("Must specify --egt for zcall workflow"); - } elsif (! -e $egt) { - $log->logwarn("Warning: EGT file '$egt' does not exist, must be created before running workflow."); - } else { - write_zcall(\@params, $egt, $zstart, $ztotal); - } - } else { - $log->logcroak("Invalid workflow argument '", $workflow, - "'; must be one of illuminus, zcall"); - } - } -} - -sub write_illuminus { - my ($outdir, $dbpath, $run, $workdir, $manifest, $plex_manifest, - $chunk_size, $memory) = @_; - - my %illuminus_args = ( - 'chunk_size' => $chunk_size, - 'memory' => $memory, - 'manifest' => $manifest, - 'plex_manifest' => $plex_manifest, - 'gender_method' => 'Supplied' - ); - my $workflow_name = 'Genotyping::Workflows::GenotypeIlluminus'; - write_workflow($dbpath, $run, $workdir, $workflow_name, \%illuminus_args, - $outdir.'genotype_illuminus.yml'); -} - -sub write_zcall { - my ($paramsRef, $egt, $zstart, $ztotal) = @_; - my ($outdir, $dbpath, $run, $workdir, $manifest, $plex_manifest, - $chunk_size, $memory) = @{$paramsRef}; - my %zcall_args = ( - 'chunk_size' => $chunk_size, - 'memory' => $memory, - 'manifest' => $manifest, - 'plex_manifest' => $plex_manifest, - 'egt' => $egt, - 'zstart' => $zstart, - 'ztotal' => $ztotal - ); - my $workflow_name = 'Genotyping::Workflows::GenotypeZCall'; - write_workflow($dbpath, $run, $workdir, $workflow_name, \%zcall_args, - $outdir.'genotype_zcall.yml'); -} - -sub write_workflow { - my ($dbpath, $run, $workdir, $workflow_name, $extra_args_ref, $out) = @_; - my @workflow_args = ($dbpath, $run, $workdir, $extra_args_ref); - my %args = ( - 'library' => 'genotyping', - 'workflow' => $workflow_name, - 'arguments' => \@workflow_args, - ); - DumpFile($out, (\%args)); -} - - -__END__ - - -=head1 NAME - -genotyping_yml - -=head1 SYNOPSIS - -genotyping_yml [--dbfile ] [--help] - --manifest --run [--egt ] - [--verbose] --workdir --workflow - -Options: - - --chunk_size Chunk size for parallelization. Optional, defaults to - 4000 (SNPs) for Illuminus or 40 (samples) for zCall. - --dbfile The SQLite database filename (not the full path). Optional, - defaults to genotyping.db. - --egt Path to an Illumina .egt cluster file. Required for zcall. - --help Display help. - --host Name of host machine for the beanstalk message queue. - Optional, defaults to farm3-head2. - --manifest Path to the .bpm.csv manifest file. - --plex_manifest Path to the .tsv QC plex manifest. - --memory Memory limit hint for LSF, in MB. Default = 2048. - --outdir Directory in which to write YML files. Optional, defaults - to current working directory. - --run The pipeline run name in the database. Required. - --verbose Print messages while processing. Optional. - --workdir Working directory for pipeline run. Required. - --workflow Pipeline workflow for which to create a .yml file. If - supplied, must be one of: illuminus, genosnp, zcall. - If absent, only config.yml will be generated. - --zstart Start of zscore range, used for zCall only. Default = 7. - --ztotal Number of zscores in range, for zCall only. Default = 1. - -=head1 DESCRIPTION - -Generates .yml files to run the genotyping pipeline. Output is the generic -config.yml file, and optionally a workflow file for one of the available -genotype callers. The workflow file can then be placed in the Percolate 'in' -directory while the config file is supplied as an argument to the Percolate -executable. - -The script assumes that the named genotyping database file will be present -in the given working directory. - -=head1 METHODS - -None - -=head1 AUTHOR - -Iain Bancarz - -=head1 COPYRIGHT AND DISCLAIMER - -Copyright (c) 2014 Genome Research Limited. All Rights Reserved. - -This program is free software: you can redistribute it and/or modify -it under the terms of the Perl Artistic License or the GNU General -Public License as published by the Free Software Foundation, either -version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -=cut diff --git a/src/perl/bin/identity_simulation.pl b/src/perl/bin/identity_simulation.pl new file mode 100755 index 000000000..cd8d94823 --- /dev/null +++ b/src/perl/bin/identity_simulation.pl @@ -0,0 +1,291 @@ +#! /software/bin/perl + +use utf8; + +package main; + +use warnings; +use strict; + +use FindBin qw($Bin); +use Getopt::Long; +use Log::Log4perl; +use Log::Log4perl::Level; +use Pod::Usage; + +use WTSI::NPG::Genotyping::Call; +use WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator; +use WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian; +use WTSI::NPG::Genotyping::SNPSet; +use WTSI::NPG::Utilities qw(user_session_log); + +# script to explore effect of varying parameters in Bayesian ID check +# previously in separate Git repository as vary_qc_data.pl +# used to generate input for R plots of parameter effects + +# inputs: simulation mode, (range of) model params +# outputs: tab-separated (parameter, concordance, identity) triples + +our $VERSION = ''; + +my $uid = `whoami`; +chomp($uid); +my $session_log = user_session_log($uid, 'check_identity_bed_wip'); + +my $embedded_conf = " + log4perl.logger.npg.genotyping.qc.identity = ERROR, A1, A2 + + log4perl.appender.A1 = Log::Log4perl::Appender::Screen + log4perl.appender.A1.utf8 = 1 + log4perl.appender.A1.layout = Log::Log4perl::Layout::PatternLayout + log4perl.appender.A1.layout.ConversionPattern = %d %p %m %n + + log4perl.appender.A2 = Log::Log4perl::Appender::File + log4perl.appender.A2.filename = $session_log + log4perl.appender.A2.utf8 = 1 + log4perl.appender.A2.layout = Log::Log4perl::Layout::PatternLayout + log4perl.appender.A2.layout.ConversionPattern = %d %p %m %n + log4perl.appender.A2.syswrite = 1 +"; + +my $log; + +run() unless caller(); + +sub run { + + my $debug; + my $log4perl_config; + my $mode; + my $snpset_file; + my $verbose; + my $start; + my $incr; + my $total; + + my $mode_vary_ecp = 'ecp'; # equivalent calls probability + my $mode_vary_smp = 'smp'; # sample mismatch prior + my $mode_vary_xer = 'xer'; # expected error rate + my $mode_vary_qcs = 'qcs'; # qc SNPs + my $mode_vary_qcr = 'qcr'; # qc runs + + my @modes = ($mode_vary_ecp, + $mode_vary_smp, + $mode_vary_xer, + $mode_vary_qcs, + $mode_vary_qcr); + + GetOptions( + 'debug' => \$debug, + 'help' => sub { pod2usage(-verbose => 2, + -exitval => 0) }, + 'incr=f' => \$incr, + 'logconf=s' => \$log4perl_config, + 'mode=s' => \$mode, + 'snpset=s' => \$snpset_file, + 'start=f' => \$start, + 'total=i' => \$total, + 'verbose' => \$verbose); + + if ($log4perl_config) { + Log::Log4perl::init($log4perl_config); + $log = Log::Log4perl->get_logger('npg.genotyping.qc.identity'); + } + else { + Log::Log4perl::init(\$embedded_conf); + $log = Log::Log4perl->get_logger('npg.genotyping.qc.identity'); + if ($verbose) { + $log->level($INFO); + } + elsif ($debug) { + $log->level($DEBUG); + } + } + + my $data_path = $Bin.'/../t/qc/check/identity'; + $snpset_file ||= "$data_path/W30467_snp_set_info_1000Genomes.tsv"; + my $snpset = WTSI::NPG::Genotyping::SNPSet->new($snpset_file); + + my $calls = generate_calls($snpset); + + my $idsim = WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator->new( + calls => $calls, + snpset => $snpset, + logger => $log, + ); + + my $results; + + # either all range options, or none of them, may have arguments + my $range_ok = 0; + if (defined($start) && defined($incr) && defined($total)) { + $range_ok = 1; + } elsif (!(defined($start) || defined($incr) || defined($total))) { + $range_ok = 1; + } + unless ($range_ok) { + $log->logcroak("Invalid range: Must supply arguments for all of ", + "(--start, --incr, --total), or none of them."); + } + + if (!defined($mode)) { + $log->logcroak("Mode argument is required"); + } elsif ($mode eq $mode_vary_ecp) { + $results = $idsim->find_identity_vary_ecp($start, $incr, $total); + } elsif ($mode eq $mode_vary_qcr) { + $results = $idsim->find_identity_vary_qcr($start, $incr, $total); + } elsif ($mode eq $mode_vary_qcs) { + $results = $idsim->find_identity_vary_qcs($start, $incr, $total); + } elsif ($mode eq $mode_vary_smp) { + $results = $idsim->find_identity_vary_smp($start, $incr, $total); + } elsif ($mode eq $mode_vary_xer) { + $results = $idsim->find_identity_vary_xer($start, $incr, $total); + } else { + $log->logcroak("Illegal mode argument '", $mode, + "'; permitted values are: (", + join(', ', @modes), + "). Run with --help for details."); + } + print $mode."\tconcord\tid\n"; + my $format; + if ($mode eq $mode_vary_qcs || $mode eq $mode_vary_qcr) { + $format = "%d\t%.3f\t%.8f\n"; # integer parameter + } else { + $format = "%.3f\t%.3f\t%.8f\n"; # float parameter + } + + foreach my $result (@{$results}) { + printf $format, @{$result}; + } +} + + +sub generate_calls { + my ($snpset, ) = @_; + # snpset argument must include the snps in hard-coded calls below + # full set of production data + # 25 SNPs (excluding gender markers) + # TODO flexibly generate fake calls from the snpset, with a given het rate + my @data = ( + ['rs649058', 'AG'], + ['rs1131498', 'AA'], + ['rs1805087', 'AG'], + ['rs3795677', 'AG'], + ['rs6166', 'AG'], + ['rs1801262', 'AA'], + ['rs2286963', 'GT'], + ['rs6759892', 'GT'], + ['rs7627615', 'AG'], + ['rs11096957', 'AA'], + ['rs2247870', 'CT'], + ['rs4619', 'AG'], + ['rs532841', 'CT'], + ['rs6557634', 'CT'], + ['rs4925', 'AC'], + ['rs156697', 'AA'], + ['rs5215', 'CT'], + ['rs12828016', 'AA'], + ['rs7298565', 'AG'], + ['rs3742207', 'AC'], + ['rs4075254', 'CT'], + ['rs4843075', 'GA'], + ['rs8065080', 'CT'], + ['rs1805034', 'AA'], + ['rs2241714', 'CT'], + ['rs753381', 'AG'] + ); + + my @calls; + foreach my $pair (@data) { + my ($snp, $genotype) = @{$pair}; + push @calls, WTSI::NPG::Genotyping::Call->new + (snp => $snpset->named_snp($snp), + genotype => $genotype); + + } + return \@calls; +} + +# TODO could use area under curve as a summary statistic + + +__END__ + +=head1 NAME + +identity_simulation + +=head1 SYNOPSIS + +identity_simulation --mode NUM [--help] [--snpset PATH] [--verbose] + +Options: + + --help Display help. + --incr=NUM Increment for parameter values. Optional. + --logconf=PATH Path to Perl logger configuration file. Optional. + --mode=STRING String to identify the simulation mode, ie. the + parameter to be varied. See below for list of + permitted modes. + --snpset=PATH Path to .tsv snpset manifest file. Optional, + defaults to copy of W30467 manifest in local test + directory. + --start=NUM Starting value for parameter to be varied. Optional. + --total=INT Total number of parameter values. Optional. + --verbose Turn on verbose logging. Optional. + +=head1 DESCRIPTION + +Generate simulated data for the Bayesian identity check and evaluate the +identity metric over a range of concordance. Tab-delimited results are +written to standard output. + +If given, the --start, --incr, and --total arguments control the range of +parameters to be simulated. For example, with --start 0, --incr 0.2, +--total 4, the parameter values will be (0.0, 0.2, 0.4, 0.6). If only one +or two of --start, --incr, and --total are given, an error is thrown. If +none are given, appropriate default values will be used. Run with the +--verbose option to view the parameter range in use. The script will throw +an error if an inappropriate range is chosen (eg. a probability greater +than 1). + +The --mode argument is a three-letter code identifying which parameter +will be varied, as follows: + +=over + +=item * ecp: Equivalent Calls Probability. Probability of equivalent genotype calls on unrelated samples. + +=item * qcs: Quality Control SNPs. Total number of SNPs in QC set. + +=item * qcr: Quality Control Runs. Total number of (identical) quality control runs, ie. number of QC calls for each SNP and sample. + +=item * smp: Sample Mismatch Prior. The Bayesian prior probability of non-identical samples. + +=item * xer: Expected Error Rate. The probability of non-equivalent calls on the same sample. This is a proxy for the calling error rate. + +=back + +=head1 METHODS + +None + +=head1 AUTHOR + +Iain Bancarz + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2016 Genome Research Limited. All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the Perl Artistic License or the GNU General +Public License as published by the Free Software Foundation, either +version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +=cut diff --git a/src/perl/bin/ready_qc_calls.pl b/src/perl/bin/ready_qc_calls.pl index 526756928..b0720060f 100755 --- a/src/perl/bin/ready_qc_calls.pl +++ b/src/perl/bin/ready_qc_calls.pl @@ -16,12 +16,7 @@ package main; use Pod::Usage; use WTSI::NPG::Genotyping::Database::Pipeline; -use WTSI::NPG::Genotyping::Fluidigm::Subscriber; -use WTSI::NPG::Genotyping::Sequenom::Subscriber; -use WTSI::NPG::Genotyping::SNPSet; -use WTSI::NPG::Genotyping::VCF::AssayResultParser; -use WTSI::NPG::iRODS; -use WTSI::NPG::iRODS::DataObject; +use WTSI::NPG::Genotyping::VCF::PlexResultFinder; use WTSI::NPG::Utilities qw(user_session_log); our $VERSION = ''; @@ -76,8 +71,7 @@ sub run { my $debug; my $inifile; my $log4perl_config; - my $samples; - my $output_path; + my $output_dir; my $verbose; GetOptions('callset=s' => \$callset, @@ -88,8 +82,7 @@ sub run { -exitval => 0) }, 'inifile=s' => \$inifile, 'logconf=s' => \$log4perl_config, - 'samples=s' => \$samples, - 'out=s' => \$output_path, + 'out=s' => \$output_dir, 'verbose' => \$verbose); $inifile ||= $DEFAULT_INI; @@ -111,134 +104,52 @@ sub run { } ### validate command-line arguments ### - unless ($config) { - $log->logcroak("--config argument is required"); + my @config = split(/,/msx, $config); + # JSON config files supplied as a comma-separated list + # Use instead of eg. "--config foo.json --config bar.json" for + # compatibility with Percolate cli_args_map function + if (scalar @config == 0) { + $log->logcroak("Must supply at least one --config argument"); } - if ($dbfile && $samples) { - $log->logcroak("Cannot specify both --dbfile and --samples"); - } elsif (!($dbfile || $samples)) { - $log->logcroak("Must specify exactly one of --dbfile ", - "and --samples"); + foreach my $config_path (@config) { + unless (-e $config_path) { + $log->logcroak("Config path '", $config_path, + "' does not exist. Paths must be supplied as ", + "a comma-separated list; individual paths ", + "cannot contain commas."); + } } - unless ($output_path) { + if (!(defined($output_dir))) { $log->logcroak("--out argument is required"); + } elsif (!(-d $output_dir)) { + $log->logcroak("--out argument '", $output_dir, + "' is not a directory"); } - - ### read and validate config file ### - my $contents = decode_json(read_file($config)); - my %params = %{$contents}; - foreach my $key (@REQUIRED_CONFIG_KEYS) { - unless ($params{$key}) { - $log->logcroak("Required parameter '", $key, - "' missing from config file '", $config, "'"); - } - } - $callset ||= $params{$PLATFORM_KEY}; # assign callset default (if needed) - - ### set up iRODS connection and make it use same logger as script ### - my $irods = WTSI::NPG::iRODS->new; - $irods->logger($log); - - ### read sample identifiers ### - my @sample_ids; - if ($dbfile) { - # get sample names from pipeline DB - my @initargs = (name => 'pipeline', - inifile => $inifile, - dbfile => $dbfile); - my $pipedb = WTSI::NPG::Genotyping::Database::Pipeline->new - (@initargs)->connect - (RaiseError => 1, - sqlite_unicode => 1, - on_connect_do => 'PRAGMA foreign_keys = ON'); - my @samples = $pipedb->sample->all; - @sample_ids = uniq map { $_->sanger_sample_id } @samples; - } elsif ($samples) { - my @contents = decode_json(read_file($samples)); - @sample_ids = @{$contents[0]}; + if (!$dbfile) { + $log->logcroak("--dbfile argument is required"); + } elsif (! -e $dbfile) { + $log->logcroak("--dbfile argument '", $dbfile, "' does not exist"); } - ### read data from iRODS ### - my @irods_data = _query_irods($irods, \@sample_ids, \%params, $log); - my ($resultsets, $chromosome_lengths, $vcf_meta, $assay_snpset, - $vcf_snpset) = @irods_data; - if (scalar @{$resultsets} == 0) { - $log->logcroak("No assay result sets found for QC plex '", - $params{$SNPSET_NAME_KEY}, "'"); - } - $vcf_meta->{$CALLSET_NAME_KEY} = [$callset, ]; # update VCF metadata - ### call VCF parser on resultsets and write to file ### - my $vcfData = WTSI::NPG::Genotyping::VCF::AssayResultParser->new( - resultsets => $resultsets, - contig_lengths => $chromosome_lengths, - assay_snpset => $assay_snpset, - vcf_snpset => $vcf_snpset, - logger => $log, - metadata => $vcf_meta, - )->get_vcf_dataset(); - open my $out, ">", $output_path || - $log->logcroak("Cannot open VCF output: '", $output_path, "'"); - print $out $vcfData->str()."\n"; - close $out || - $log->logcroak("Cannot close VCF output: '", $output_path, "'"); -} - -sub _query_irods { - # get AssayResultSets, SNPSets, and contig lengths from iRODS - # works for Fluidigm or Sequenom - my ($irods, $sample_ids, $params, $log) = @_; - my $subscriber; - my %query_params = (irods => $irods, - data_path => $params->{$IRODS_DATA_PATH_KEY}, - reference_path => $params->{$REFERENCE_PATH_KEY}, - reference_name => $params->{$REFERENCE_NAME_KEY}, - snpset_name => $params->{$SNPSET_NAME_KEY}, - logger => $log); - if ($params->{$PLATFORM_KEY} eq $FLUIDIGM) { - $subscriber = WTSI::NPG::Genotyping::Fluidigm::Subscriber->new - (%query_params); - } elsif ($params->{$PLATFORM_KEY} eq $SEQUENOM) { - if ($params->{$READ_VERSION_KEY}) { - $query_params{'snpset_version'} = $params->{$READ_VERSION_KEY}; - } - $subscriber = WTSI::NPG::Genotyping::Sequenom::Subscriber->new - (%query_params); - } else { - $log->logcroak("Unknown plex type: '", $params->{$PLATFORM_KEY}, "'"); - } - my ($resultset_hashref, $vcf_metadata) = - $subscriber->get_assay_resultsets_and_vcf_metadata($sample_ids); - - # unpack hashref from Subscriber.pm into an array of resultsets - # TODO exploit ability of Subscriber.pm to find multiple resultsets for each sample - my @resultsets; - foreach my $sample (keys %{$resultset_hashref}) { - my @sample_resultsets = @{$resultset_hashref->{$sample}}; - push @resultsets, @sample_resultsets; - } - my $total = scalar @resultsets; - $log->info("Found $total assay resultsets."); - my $assay_snpset = $subscriber->snpset; - my $vcf_snpset; - if ($params->{$PLATFORM_KEY} eq $SEQUENOM) { - my @args = ( - $params->{$REFERENCE_PATH_KEY}, - $params->{$REFERENCE_NAME_KEY}, - $params->{$SNPSET_NAME_KEY}, - ); - if ($params->{$WRITE_VERSION_KEY}) { - push @args, $params->{$WRITE_VERSION_KEY}; - } - $vcf_snpset = $subscriber->find_irods_snpset(@args); - } else { - $vcf_snpset = $assay_snpset; - } - return (\@resultsets, - $subscriber->get_chromosome_lengths(), - $vcf_metadata, - $assay_snpset, - $vcf_snpset, - ); + ### read sample identifiers from pipeline DB ### + my @initargs = (name => 'pipeline', + inifile => $inifile, + dbfile => $dbfile); + my $pipedb = WTSI::NPG::Genotyping::Database::Pipeline->new + (@initargs)->connect + (RaiseError => 1, + sqlite_unicode => 1, + on_connect_do => 'PRAGMA foreign_keys = ON'); + my @samples = $pipedb->sample->all; + my @sample_ids = uniq map { $_->sanger_sample_id } @samples; + + ### create PlexResultFinder and write VCF ### + my $finder = WTSI::NPG::Genotyping::VCF::PlexResultFinder->new( + sample_ids => \@sample_ids, + subscriber_config => \@config, + logger => $log, + ); + my $vcf_paths = $finder->write_vcf($output_dir); } ## TODO Retrieve results for multiple plex types / experiments and record in the same VCF file @@ -260,19 +171,17 @@ =head1 SYNOPSIS calls (eg. from different platforms or runs) in identity check output. Optional, defaults to platform name in file supplied for --config. - --config Path to JSON file with configuration parameters for - reading the QC plex calls. + --config Comma-separated list of paths to one or more JSON files, + with configuration parameters for reading the QC plex + calls. The individual paths *cannot* contain commas. + Required. --dbfile Path to pipeline SQLite database file. Used to read - sample identifiers. Must supply exactly one of --dbfile - or --samples. + sample identifiers. Required. --help Display help. --inifile Path to .ini file to configure pipeline SQLite database connection. Optional. Only relevant if --dbfile is given. - --out Path for VCF output. Required. - --samples Path to JSON file containing a list of sample identifiers. - The file should contain *only* a simple list, so the - "sample.json" file produced by g2i is not appropriate. - Must supply exactly one of --dbfile or --samples. + --out Path to directory for VCF output. Required. + =head1 DESCRIPTION @@ -290,7 +199,7 @@ =head1 AUTHOR =head1 COPYRIGHT AND DISCLAIMER -Copyright (C) 2015 Genome Research Limited. All Rights Reserved. +Copyright (C) 2015, 2016 Genome Research Limited. All Rights Reserved. This program is free software: you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General diff --git a/src/perl/bin/ready_workflow.pl b/src/perl/bin/ready_workflow.pl new file mode 100755 index 000000000..521daec0a --- /dev/null +++ b/src/perl/bin/ready_workflow.pl @@ -0,0 +1,481 @@ +#!/software/bin/perl + +use utf8; + +package main; + +use warnings; +use strict; + +use Cwd qw(getcwd abs_path); +use File::Basename qw(fileparse); +use File::Copy qw(copy); +use File::Slurp qw(read_file); +use File::Spec::Functions qw(catfile); +use FindBin qw($Bin); +use Getopt::Long; +use JSON; +use List::AllUtils qw(uniq); +use Log::Log4perl; +use Log::Log4perl::Level; +use Pod::Usage; +use YAML qw /DumpFile/; + +use WTSI::NPG::Genotyping::Database::Pipeline; +use WTSI::NPG::Genotyping::VCF::PlexResultFinder; +use WTSI::NPG::Utilities qw(user_session_log); + +our $VERSION = ''; + +our $DEFAULT_INI = $ENV{HOME} . "/.npg/genotyping.ini"; +our $PERCOLATE_LOG_NAME = 'percolate.log'; +our $GENOTYPING_DB_NAME = 'genotyping.db'; +our $MODULE_ILLUMINUS = 'Genotyping::Workflows::GenotypeIlluminus'; +our $MODULE_ZCALL = 'Genotyping::Workflows::GenotypeZCall'; +our $ILLUMINUS = 'illuminus'; +our $ZCALL = 'zcall'; + +our $DEFAULT_HOST = 'farm3-head2'; +our $DEFAULT_CHUNK_SIZE_SNP = 4000; +our $DEFAULT_CHUNK_SIZE_SAMPLE = 40; +our $DEFAULT_MEMORY = 2048; +our $DEFAULT_ZSTART = 7; +our $DEFAULT_ZTOTAL = 1; + +our $VCF_SUBDIRECTORY = 'vcf'; +our $PLEX_MANIFEST_SUBDIRECTORY = 'plex_manifests'; + +my $uid = `whoami`; +chomp($uid); +my $session_log = user_session_log($uid, 'ready_workflow'); + +my $embedded_conf = " + log4perl.logger.npg.genotyping.ready_workflow = ERROR, A1, A2 + + log4perl.appender.A1 = Log::Log4perl::Appender::Screen + log4perl.appender.A1.utf8 = 1 + log4perl.appender.A1.layout = Log::Log4perl::Layout::PatternLayout + log4perl.appender.A1.layout.ConversionPattern = %d %p %m %n + + log4perl.appender.A2 = Log::Log4perl::Appender::File + log4perl.appender.A2.filename = $session_log + log4perl.appender.A2.utf8 = 1 + log4perl.appender.A2.layout = Log::Log4perl::Layout::PatternLayout + log4perl.appender.A2.layout.ConversionPattern = %d %p %m %n + log4perl.appender.A2.syswrite = 1 +"; + +my $log; + +run() unless caller(); + +sub run { + my $workdir; + my $manifest; + my $smaller; + my $debug; + my $dbfile; + my $run; + my $egt; + my $inifile; + my $log4perl_config; + my $verbose; + my $host; + my $workflow; + my $chunk_size; + my $memory; + my $zstart; + my $ztotal; + my @plex_config; + + GetOptions('workdir=s' => \$workdir, + 'manifest=s' => \$manifest, + 'plex_config=s' => \@plex_config, + 'host=s' => \$host, + 'dbfile=s' => \$dbfile, + 'run=s' => \$run, + 'egt=s' => \$egt, + 'inifile=s' => \$inifile, + 'verbose' => \$verbose, + 'workflow=s' => \$workflow, + 'chunk_size=i' => \$chunk_size, + 'smaller' => \$smaller, + 'memory=i' => \$memory, + 'zstart=i' => \$zstart, + 'ztotal=i' => \$ztotal, + 'logconf=s' => \$log4perl_config, + 'debug' => \$debug, + 'help' => sub { pod2usage(-verbose => 2, -exitval => 0) }, + ); + + ### set up logging ### + if ($log4perl_config) { + Log::Log4perl::init($log4perl_config); + $log = Log::Log4perl->get_logger('npg.genotyping.ready_workflow'); + } + else { + Log::Log4perl::init(\$embedded_conf); + $log = Log::Log4perl->get_logger('npg.genotyping.ready_workflow'); + if ($verbose) { + $log->level($INFO); + } + elsif ($debug) { + $log->level($DEBUG); + } + } + + ### validate command-line arguments ### + $inifile ||= $DEFAULT_INI; + if (! -e $inifile) { + $log->logcroak("--inifile argument '", $inifile, "' does not exist"); + } + if ($workdir) { + $workdir = abs_path($workdir); + $log->info("Working directory absolute path is '", $workdir, "'"); + } else { + $log->logcroak("--workdir argument is required"); + } + if (!$run) { + $log->logcroak("--run argument is required"); + } + if (!$dbfile) { + $log->logcroak("--dbfile argument is required"); + } elsif (! -e $dbfile) { + $log->logcroak("--dbfile argument '", $dbfile, "' does not exist"); + } + if (!$manifest) { + $log->logcroak("--manifest argument is required"); + } elsif (! -e $manifest) { + $log->logcroak("--manifest argument '", $manifest, + "' does not exist"); + } + if (!$workflow) { + $log->logcroak("--workflow argument is required"); + } elsif (!($workflow eq $ILLUMINUS || $workflow eq $ZCALL)) { + $log->logcroak("Invalid workflow argument; must be '", + $ILLUMINUS, "' or '", $ZCALL, "'"); + } + if (defined($egt) && !(-e $egt)) { + $log->logcroak("--egt argument '", $egt, "' does not exist"); + } + if (scalar @plex_config == 0) { # get defaults from perl/etc directory + my $etc_dir = catfile($Bin, "..", "etc"); + foreach my $name (qw/ready_qc_fluidigm.json ready_qc_sequenom.json/) { + push @plex_config, catfile($etc_dir, $name); + } + } + foreach my $plex_config (@plex_config) { + if (! -e $plex_config) { + $log->logcroak("--plex_config argument '", $plex_config, + "' does not exist"); + } + } + + $host ||= $DEFAULT_HOST; + # illuminus paralellizes by SNP, other callers by sample + if ($workflow eq 'illuminus') { $chunk_size ||= $DEFAULT_CHUNK_SIZE_SNP; } + else { $chunk_size ||= $DEFAULT_CHUNK_SIZE_SAMPLE; } + $memory ||= $DEFAULT_MEMORY; + + # ensure $zstart, $ztotal are initialized before comparison + $zstart ||= $DEFAULT_ZSTART; + $ztotal ||= $DEFAULT_ZTOTAL; + if ($zstart <=0) { $log->logcroak("zstart must be > 0"); } + if ($ztotal <=0) { $log->logcroak("ztotal must be > 0"); } + + ### create and populate the working directory ### + make_working_directory($workdir); + write_config_yml($workdir, $host); + + ### read sample identifiers from pipeline DB & create PlexResultFinder ### + my @initargs = (name => 'pipeline', + inifile => $inifile, + dbfile => $dbfile); + my $pipedb = WTSI::NPG::Genotyping::Database::Pipeline->new + (@initargs)->connect + (RaiseError => 1, + sqlite_unicode => 1, + on_connect_do => 'PRAGMA foreign_keys = ON'); + my @samples = $pipedb->sample->all; + my @sample_ids = uniq map { $_->sanger_sample_id } @samples; + my $finder = WTSI::NPG::Genotyping::VCF::PlexResultFinder->new( + sample_ids => \@sample_ids, + logger => $log, + subscriber_config => \@plex_config, + ); + + ### write plex manifests and VCF to working directory ### + my $manifest_dir = catfile($workdir, $PLEX_MANIFEST_SUBDIRECTORY); + my $plex_manifests = $finder->write_manifests($manifest_dir); + my $vcf_dir = catfile($workdir, $VCF_SUBDIRECTORY); + my $vcf = $finder->write_vcf($vcf_dir); + + ### if required, copy manifest, database and EGT to working directory ### + unless ($smaller) { + $dbfile = copy_file_to_directory($dbfile, $workdir); + $manifest = copy_file_to_directory($manifest, $workdir); + if (defined($egt)) { + $egt = copy_file_to_directory($egt, $workdir); + } + } + write_workflow_yml($workdir, $workflow, $dbfile, $run, $manifest, + $chunk_size, $memory, $vcf, $plex_manifests, + $egt, $zstart, $ztotal); + $log->info("Finished; genotyping pipeline directory '", $workdir, + "' is ready to run Percolate."); +} + +sub copy_file_to_directory { + # convenience method to copy a file and return the destination file path + my ($source, $dir) = @_; + my $filename = fileparse($source); + my $dest = catfile($dir, $filename); + copy($source, $dest) || $log->logcroak("Cannot copy '", $source, + "' to '", $dest, "'"); + return $dest; +} + +sub make_working_directory { + # make in, pass, fail if needed; copy dbfile to working directory + # if $include_qc_plex, also create vcf and plex_manifest subdirs + my ($workdir) = @_; + if (-e $workdir) { + if (-d $workdir) { + $log->info("Working directory '", $workdir, "' already exists"); + } else { + $log->logcroak("--workdir argument '", $workdir, + "' exists and is not a directory"); + } + } else { + mkdir $workdir || $log->logcroak("Cannot create directory '", + $workdir, "'"); + $log->info("Created working directory '", $workdir, "'"); + } + # create subdirectories + my @names = ('in', 'pass', 'fail', $VCF_SUBDIRECTORY, + $PLEX_MANIFEST_SUBDIRECTORY); + foreach my $name (@names) { + my $subdir = catfile($workdir, $name); + if (-e $subdir) { + if (-d $subdir) { + $log->debug("Subdirectory '", $subdir, "' already exists"); + } else { + $log->logcroak("Expected subdirectory path '", $subdir, + "' exists and is not a directory"); + } + } else { + mkdir($subdir) || $log->logcroak("Cannot create subdirectory '", + $subdir, "'"); + $log->debug("Created subdirectory '", $subdir, "'"); + } + } +} + +sub write_config_yml { + my ($workdir, $host) = @_; + my %config = ( + 'root_dir' => $workdir, + 'log' => catfile($workdir, $PERCOLATE_LOG_NAME), + 'log_level' => 'DEBUG', + 'msg_host' => $host, + 'msg_port' => '11300', + 'async' => 'lsf', + 'max_processes' => '250' + ); + my $config_path = catfile($workdir, 'config.yml'); + $log->info("Wrote config YML to '", $config_path, "'"); + DumpFile($config_path, (\%config)); + return $config_path; +} + +sub write_workflow_yml { + my ($workdir, $workflow, $dbpath, $run, $manifest, $chunk_size, + $memory, $vcf, $plex_manifests, $egt, $zstart, $ztotal) = @_; + my %workflow_args = ( + 'chunk_size' => $chunk_size, + 'memory' => $memory, + 'manifest' => $manifest, + 'plex_manifest' => $plex_manifests, + 'vcf' => $vcf, + ); + my $workflow_module; + if ($workflow eq $ILLUMINUS) { + $workflow_args{'gender_method'} = 'Supplied'; + $workflow_module = $MODULE_ILLUMINUS; + } elsif ($workflow eq $ZCALL) { + if (!($egt && $zstart && $ztotal)) { + $log->logcroak("Must specify EGT, zstart, and ztotal for ", + "zcall workflow"); + } elsif (! -e $egt) { + $log->logcroak("EGT file '", $egt, "' does not exist."); + } else { + $workflow_args{'egt'} = $egt; + $workflow_args{'zstart'} = $zstart; + $workflow_args{'ztotal'} = $ztotal; + $workflow_module = $MODULE_ZCALL; + } + } else { + $log->logcroak("Invalid workflow argument '", $workflow, + "'; must be one of $ILLUMINUS, $ZCALL"); + } + my @args = ($dbpath, $run, $workdir, \%workflow_args); + my %params = ( + 'library' => 'genotyping', + 'workflow' => $workflow_module, + 'arguments' => \@args, + ); + my $out = catfile($workdir, "in", "genotype_".$workflow.".yml"); + $log->info("Wrote workflow YML to '", $out, "'"); + DumpFile($out, (\%params)); +} + + +__END__ + + +=head1 NAME + +ready_workflow + +=head1 SYNOPSIS + +ready_workflow [--dbfile ] [--help] + --manifest --run [--egt ] + [--memory ] [--host ] [--plex_config ] + [--verbose] --workdir --workflow + +Options: + + --chunk_size Chunk size for parallelization. Optional, defaults to + 4000 (SNPs) for Illuminus or 40 (samples) for zCall. + --dbfile Path to an SQLite pipeline database file. Required. + --egt Path to an Illumina .egt cluster file. Required for zcall. + --help Display help. + --host Name of host machine for the beanstalk message queue. + Optional, defaults to farm3-head2. + --manifest Path to the .bpm.csv manifest file. Required. + --memory Memory limit hint for LSF, in MB. Default = 2048. + --plex_config Path to a JSON file with parameters to query iRODS and + write QC plex data as VCF. May be supplied more than once + to specify multiple files. Optional, defaults to a standard + set of config files. + --run The pipeline run name in the database. Required. + --smaller Do not copy the .egt, manifest, and plex manifest files to + the workflow directory. Uses less space, but makes the + analysis directory less self-contained. + --verbose Print messages while processing. Optional. + --workdir Working directory for pipeline run. Required. + --workflow Pipeline workflow for which to create a .yml file. + Required; must be 'illuminus' or 'zcall'. + If absent, only config.yml will be generated. + --zstart Start of zscore range, used for zCall only. Default = 7. + --ztotal Number of zscores in range, for zCall only. Default = 1. + +=head1 DESCRIPTION + +Create and populate a working directory for the genotyping pipeline. +Items in the populated directory include: + +=over + +=item * + +The config.yml file for Percolate + +=item * + +The genotyping YML file with parameters for the pipeline workflow, +placed in the 'in' subdirectory + +=item * + +VCF files containing the qc plex calls (if any) + +=item * + +A copy of the SQLite genotyping database file + +=back + +=head2 Configuration file format + +The script requires one or more JSON files with config parameters. If none +are specified by the user, default files will be used. The defaults are +located in the perl/etc directory. + +Each configuration file must be a single hash in JSON format. Keys and values +correspond to construction arguments for Subscriber objects, with one +exception: The 'platform' key denotes a genotyping platform (eg. 'sequenom' +or 'fluidigm'). + +B key/value pairs are: + +=over + +=item * + +I: String denoting a genotyping platform: 'sequenom' or 'fluidigm' + +=item * + +I: Name of the QC plex SNP set: Eg. "W35961". + +=back + +Other key/value pairs are optional, and will receive default values if +not specified in the JSON config. These are: + +=over + +=item * + +I: Identifier for the callset read by the Subscriber + +=item * + +I: iRODS path under which the input data are found + +=item * + +I: iRODS path under which the reference and SNP set data +are found + +=item * + +I: Root directory containing NPG genome references + +=item * + +I: SNP set version in iRODs metadata, used to read assay +results + +=item * + +I: SNP set version in iRODs metadata, used to write VCF + +=back + + +=head1 METHODS + +None + +=head1 AUTHOR + +Iain Bancarz + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2016 Genome Research Limited. All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the Perl Artistic License or the GNU General +Public License as published by the Free Software Foundation, either +version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +=cut diff --git a/src/perl/bin/run_qc.pl b/src/perl/bin/run_qc.pl index 0b6b7fe84..36b2ed852 100755 --- a/src/perl/bin/run_qc.pl +++ b/src/perl/bin/run_qc.pl @@ -8,130 +8,160 @@ use strict; use warnings; use Getopt::Long; -use Carp; use Cwd qw(getcwd abs_path); use File::Basename; use FindBin qw($Bin); +use Log::Log4perl; +use Log::Log4perl::Level; +use Pod::Usage; + use WTSI::NPG::Genotyping::Version qw(write_version_log); use WTSI::NPG::Genotyping::QC::Collation qw(collate readMetricThresholds); use WTSI::NPG::Genotyping::QC::Identity; use WTSI::NPG::Genotyping::QC::PlinkIO qw(checkPlinkBinaryInputs); use WTSI::NPG::Genotyping::QC::QCPlotShared qw(defaultConfigDir defaultJsonConfig defaultTexIntroPath readQCFileNames); use WTSI::NPG::Genotyping::QC::Reports qw(createReports); +use WTSI::NPG::Utilities qw(user_session_log); our $VERSION = ''; our $DEFAULT_INI = $ENV{HOME} . "/.npg/genotyping.ini"; our $CR_STATS_EXECUTABLE = "snp_af_sample_cr_bed"; our $MAF_HET_EXECUTABLE = "het_by_maf.py"; -my ($help, $outDir, $simPath, $dbPath, $iniPath, $configPath, $title, - $plinkPrefix, $runName, $mafHet, $filterConfig, $zcallFilter, - $illuminusFilter, $include, $plexManifest, $vcf, $sampleJson); - -GetOptions("help" => \$help, - "output-dir=s" => \$outDir, - "config=s" => \$configPath, - "sim=s" => \$simPath, - "dbpath=s" => \$dbPath, - "inipath=s" => \$iniPath, - "title=s" => \$title, - "run=s" => \$runName, - "vcf=s" => \$vcf, - "mafhet" => \$mafHet, - "filter=s" => \$filterConfig, - "zcall-filter" => \$zcallFilter, - "illuminus-filter" => \$illuminusFilter, - "include" => \$include, - "plex-manifest=s" => \$plexManifest, - "sample-json=s" => \$sampleJson, - ); - -if ($help) { - print STDERR "Usage: $0 [ options ] PLINK_GTFILE - -PLINK_GTFILE is the prefix for binary plink files (without .bed, .bim, .fam extension). May include directory names, eg. /home/foo/project where plink files are /home/foo/project.bed, etc. -Options: ---output-dir=PATH Directory for QC output ---sim=PATH Path to SIM file for intensity metrics. - See note [1] below. ---dbpath=PATH Path to pipeline database .db file. Required. ---inipath=PATH Path to .ini file containing general pipeline and - database configuration; local default is $DEFAULT_INI ---vcf=PATH Path to .vcf file containing QC plex calls for alternate - identity check. ---plex-manifest=PATH Path to .tsv manifest for QC plex. Required. ---run=NAME Name of run in pipeline database (needed for database - update from gender check) ---config=PATH Path to JSON config file; default is taken from inipath ---mafhet Find heterozygosity separately for SNP populations with - minor allele frequency greater than 1%, and less than - 1%. ---sample-json=PATH Sample JSON file to relate Sanger sample IDs in VCF - to sample URIs in Plink data. ---title Title for this analysis; will appear in plots ---zcall-filter Apply default zcall filter; see note [2] below. ---illuminus-filter Apply default illuminus filter; see note [2] below. ---filter=PATH Read custom filter criteria from PATH. See note [2] - below. ---include Do not exclude failed samples from the pipeline DB. - See note [2] below. - -[1] If --sim is not specified, but the intensity files magnitude.txt and -xydiff.txt are present in the pipeline output directory, intensity metrics -will be read from the files. This allows intensity metrics to be computed only -once when multiple callers are used on the same dataset. +my $uid = `whoami`; +chomp($uid); +my $session_log = user_session_log($uid, 'run_qc'); +my $embedded_conf = " + log4perl.logger.npg.ready_qc_calls = ERROR, A1, A2 + + log4perl.appender.A1 = Log::Log4perl::Appender::Screen + log4perl.appender.A1.utf8 = 1 + log4perl.appender.A1.layout = Log::Log4perl::Layout::PatternLayout + log4perl.appender.A1.layout.ConversionPattern = %d %p %m %n -[2] The --zcall, --illuminus, and --filter options enable \"prefilter\" mode: - * Samples which fail the filter criteria are excluded in the pipeline - SQLite DB. This ensures that failed samples are not input to subsequent - analyses using the same DB. - * Filter criteria are determined by one of three options: - --illuminus Default illuminus criteria - --zcall Default zcall criteria - --filter=PATH Custom criteria, given by the JSON file at PATH. - * If more than one of the above options is specified, an error is raised. - If none of them is specified, no filtering is carried out. - * Additional CSV and JSON summary files are written to describe the - prefilter results. - * If the --include option is in effect, filter summary files will be - written but samples will not be excluded from the SQLite DB. - -[3] The --plex-manifest and --vcf options, with appropriate arguments, are -required to run the alternate identity check. If both these options are not -specified, the check will be omitted. + log4perl.appender.A2 = Log::Log4perl::Appender::File + log4perl.appender.A2.filename = $session_log + log4perl.appender.A2.utf8 = 1 + log4perl.appender.A2.layout = Log::Log4perl::Layout::PatternLayout + log4perl.appender.A2.layout.ConversionPattern = %d %p %m %n + log4perl.appender.A2.syswrite = 1 "; - exit(0); -} -### process options and validate inputs -$plinkPrefix = processPlinkPrefix($ARGV[0]); -$iniPath ||= $DEFAULT_INI; -$iniPath = verifyAbsPath($iniPath); -$configPath ||= defaultJsonConfig($iniPath); -$configPath = verifyAbsPath($configPath); - -if ($simPath) { $simPath = verifyAbsPath($simPath); } -$dbPath = verifyAbsPath($dbPath); -$outDir ||= "./qc"; -$mafHet ||= 0; -if (not -e $outDir) { mkdir($outDir); } -elsif (not -w $outDir) { - die "Cannot write to output directory $outDir\n"; -} -$outDir = abs_path($outDir); -$title ||= getDefaultTitle($outDir); -my $texIntroPath = defaultTexIntroPath($iniPath); -$texIntroPath = verifyAbsPath($texIntroPath); +my $log; + +run() unless caller(); + +sub run { -$filterConfig = getFilterConfig($filterConfig, $zcallFilter, $illuminusFilter); -$include ||= 0; -my $exclude = !($include); + my ($outDir, $simPath, $dbPath, $iniPath, $configPath, $title, + $plinkPrefix, $runName, $mafHet, $filterConfig, $zcallFilter, + $illuminusFilter, $include, $plexManifests, $vcf, $sampleJson, + $log4perl_config, $verbose, $debug, $plinkRaw); -### run QC -run($plinkPrefix, $simPath, $dbPath, $iniPath, $configPath, -$runName, $outDir, $title, $texIntroPath, $mafHet, $filterConfig, $exclude, -$plexManifest, $vcf, $sampleJson); + GetOptions("help" => sub { pod2usage(-verbose => 2, + -exitval => 0) }, + "plink=s" => \$plinkRaw, + "output-dir=s" => \$outDir, + "config=s" => \$configPath, + "sim=s" => \$simPath, + "dbpath=s" => \$dbPath, + "inipath=s" => \$iniPath, + "title=s" => \$title, + "run=s" => \$runName, + "vcf=s" => \$vcf, + "mafhet" => \$mafHet, + "filter=s" => \$filterConfig, + "zcall-filter" => \$zcallFilter, + "illuminus-filter" => \$illuminusFilter, + "include" => \$include, + "plex-manifests=s" => \$plexManifests, + "sample-json=s" => \$sampleJson, + "logconf=s" => \$log4perl_config, + "verbose" => \$verbose, + "debug" => \$debug, + ); + + + ### set up logging + if ($log4perl_config) { + Log::Log4perl::init($log4perl_config); + $log = Log::Log4perl->get_logger('npg.genotyping.qc.identity'); + } else { + Log::Log4perl::init(\$embedded_conf); + $log = Log::Log4perl->get_logger('npg.genotyping.qc.identity'); + if ($verbose) { + $log->level($INFO); + } + elsif ($debug) { + $log->level($DEBUG); + } + } + + ### process options and validate inputs + if (defined($plinkRaw)) { + $plinkPrefix = processPlinkPrefix($plinkRaw); + } + $iniPath ||= $DEFAULT_INI; + $iniPath = verifyAbsPath($iniPath); + $configPath ||= defaultJsonConfig($iniPath); + $configPath = verifyAbsPath($configPath); + + if ($simPath) { $simPath = verifyAbsPath($simPath); } + $dbPath = verifyAbsPath($dbPath); + $outDir ||= "./qc"; + $mafHet ||= 0; + if (not -e $outDir) { mkdir($outDir); } + elsif (not -w $outDir) { + die "Cannot write to output directory $outDir\n"; + } + $outDir = abs_path($outDir); + $title ||= getDefaultTitle($outDir); + my $texIntroPath = defaultTexIntroPath($iniPath); + $texIntroPath = verifyAbsPath($texIntroPath); + + $filterConfig = getFilterConfig($filterConfig, $zcallFilter, + $illuminusFilter); + $include ||= 0; + my $exclude = !($include); + + # split comma-separated path lists for identity check + # Use instead of eg. "--config foo.json --config bar.json" for + # compatibility with Percolate cli_args_map function + my @vcf; + my @plexManifests; + if ($vcf && $plexManifests) { + @vcf = split(/,/msx, $vcf); + foreach my $vcf_path (@vcf) { + unless (-e $vcf_path) { + $log->logcroak("VCF path '", $vcf_path, + "' does not exist. Paths must be supplied as ", + "a comma-separated list; individual paths ", + "cannot contain commas."); + } + } + @plexManifests = split(/,/msx, $plexManifests); + foreach my $plex_path (@plexManifests) { + unless (-e $plex_path) { + $log->logcroak("Plex manifest path '", $plex_path, + "' does not exist. Paths must be supplied as ", + "a comma-separated list; individual paths ", + "cannot contain commas."); + } + } + } elsif ($vcf && !$plexManifests) { + $log->logcroak("--vcf argument must be accompanied by a", + " --plex-manifests argument"); + } elsif (!$vcf && $plexManifests) { + $log->logcroak("--plex-manifests argument must be accompanied by a", + " --vcf argument"); + } + ### run QC + run_qc($plinkPrefix, $simPath, $dbPath, $iniPath, $configPath, + $runName, $outDir, $title, $texIntroPath, $mafHet, $filterConfig, + $exclude, \@plexManifests, \@vcf, $sampleJson); + +} sub cleanup { # create a 'supplementary' subdirectory of the output directory @@ -174,22 +204,23 @@ sub getFilterConfig { foreach my $opt (@filterOpts) { if ($opt) { $filters++; } } - if ($filters > 1) { - croak "Incorrect options; must specify at most one of --filter, --illuminus-filter, --zcall-filter"; - } + if ($filters > 1) { + $log->logcroak("Incorrect options; must specify at most one of", + " --filter, --illuminus-filter, --zcall-filter"); + } my ($fConfig, $zcallFilter, $illuminusFilter) = @filterOpts; # if filter options are OK, check existence of appropriate config file my $configDir = defaultConfigDir(); if ($zcallFilter) { - $fConfig = verifyAbsPath($configDir."/zcall_prefilter.json"); + $fConfig = verifyAbsPath($configDir."/zcall_prefilter.json"); } elsif ($illuminusFilter) { - $fConfig = verifyAbsPath($configDir."/illuminus_prefilter.json"); + $fConfig = verifyAbsPath($configDir."/illuminus_prefilter.json"); } elsif ($fConfig) { $fConfig = verifyAbsPath($fConfig); # custom filter } else { $fConfig = 0; # no filtering } - return $fConfig; + return $fConfig; } sub getPlateHeatmapCommands { @@ -206,10 +237,10 @@ sub getPlateHeatmapCommands { push(@inputs, $dir.'/'.$fileNames{'magnitude'}); } foreach my $i (0..@modes-1) { - push(@cmds, join(" ", ('cat', $inputs[$i], '|', - "$Bin/plate_heatmap_plots.pl", - "--mode=$modes[$i]", - "--out_dir=$hmOut", $dbopt, + push(@cmds, join(" ", ('cat', $inputs[$i], '|', + "$Bin/plate_heatmap_plots.pl", + "--mode=$modes[$i]", + "--out_dir=$hmOut", $dbopt, "--inipath=$iniPath"))); } push (@cmds, "$Bin/plate_heatmap_index.pl $title $hmOut ". @@ -221,9 +252,8 @@ sub processPlinkPrefix { # want PLINK prefix to include absolute path, so plink I/O will still work after change of working directory # also check that PLINK binary files exist and are readable my $plinkPrefix = shift; - unless ($plinkPrefix) { - croak "ERROR: Must supply a PLINK filename prefix!"; - } elsif ($plinkPrefix =~ "/") { # prefix is "directory-like"; disassemble to find absolute path + if ($plinkPrefix =~ "/") { + # prefix is "directory-like"; disassemble to find absolute path my @terms = split("/", $plinkPrefix); my $filePrefix = pop(@terms); $plinkPrefix = abs_path(join("/", @terms))."/".$filePrefix; @@ -249,12 +279,14 @@ sub verifyAbsPath { sub run_qc_wip { # run the work-in-progess refactored QC in parallel with the old one - my ($plinkPrefix, $outDir, $plexManifest, $vcf, $sampleJson) = @_; + my ($plinkPrefix, $outDir, $plexManifestRef, $vcfRef, $sampleJson) = @_; $outDir = $outDir."/qc_wip"; mkdir($outDir); my $script = "check_identity_bed_wip.pl"; my $jsonPath = $outDir."/identity_wip.json"; my $csvPath = $outDir."/identity_wip.csv"; + my $vcf = join(',', @{$vcfRef}); + my $plexManifest = join(',', @{$plexManifestRef}); my @args = ("--json=$jsonPath", "--csv=$csvPath", "--plink=$plinkPrefix", @@ -270,7 +302,7 @@ sub run_qc_wip { } -sub run { +sub run_qc { my ($plinkPrefix, $simPath, $dbPath, $iniPath, $configPath, $runName, $outDir, $title, $texIntroPath, $mafHet, $filter, $exclude, $plexManifest, $vcf, $sampleJson) = @_; @@ -287,7 +319,7 @@ sub run { if (!defined($runName)) { die "Must supply pipeline run name for database gender update\n"; } - $genderCmd.=" --dbfile=".$dbPath." --run=".$runName; + $genderCmd.=" --dbfile=".$dbPath." --run=".$runName; push(@cmds, $genderCmd); if ($mafHet) { my $mhout = $outDir.'/'.$fileNames{'het_by_maf'}; @@ -344,35 +376,201 @@ sub run { if (!$simPath) { $cmd = $cmd." --no-intensity "; } push(@cmds, $cmd); } - push(@cmds, getPlateHeatmapCommands($dbopt, $iniPath, $outDir, $title, + push(@cmds, getPlateHeatmapCommands($dbopt, $iniPath, $outDir, $title, $intensity, \%fileNames)); - my @densityTerms = ('cat', $outDir.'/'.$fileNames{'sample_cr_het'}, '|', - "$Bin/plot_cr_het_density.pl", "--title=".$title, + my @densityTerms = ('cat', $outDir.'/'.$fileNames{'sample_cr_het'}, '|', + "$Bin/plot_cr_het_density.pl", "--title=".$title, "--out_dir=".$outDir); push(@cmds, join(' ', @densityTerms)); push(@cmds, "$Bin/plot_fail_causes.pl --title=$title --inipath=$iniPath --config=$configPath --input $outDir/qc_results.json --cr-het $outDir/sample_cr_het.txt --output-dir $outDir"); ### execute commands ### - foreach my $cmd (@cmds) { - my $result = system($cmd); - if ($result!=0) { + foreach my $cmd (@cmds) { + my $result = system($cmd); + if ($result!=0) { die qq("Command finished with non-zero exit status: "$cmd"\n); } } ### create PDF report my $texPath = $outDir."/pipeline_summary.tex"; my $genderThresholdPath = $outDir."/sample_xhet_gender_thresholds.txt"; - createReports($texPath, $statusJson, $idJson, $configPath, $dbPath, + createReports($texPath, $statusJson, $idJson, $configPath, $dbPath, $genderThresholdPath, $outDir, $texIntroPath); ### exclude failed samples from pipeline DB if ($filter) { # second pass -- evaluate filter metrics/thresholds # update DB unless the --include option is in effect - $csvPath = $outDir."/filter_results.csv"; + $csvPath = $outDir."/filter_results.csv"; $statusJson = $outDir."/filter_results.json"; - collate($outDir, $configPath, $filter, $dbPath, $iniPath, + collate($outDir, $configPath, $filter, $dbPath, $iniPath, $statusJson, $metricJson, $csvPath, $exclude); } ## create 'supplementary' directory and move files cleanup($outDir); return 1; } + + +__END__ + + +=head1 NAME + +run_qc + +=head1 SYNOPSIS + +run_qc.pl [ options ] PLINK_STEM + +PLINK_STEM is the prefix for binary plink files (without .bed, .bim, .fam +extension). May include directory names, eg. /home/foo/project where plink +files are /home/foo/project.bed, etc. + +Options: + + --plink Prefix for binary plink files (without .bed, .bim, + .fam extension). May include directory names, + eg. /home/foo/project where files are + /home/foo/project.bed, etc. + + --output-dir=PATH Directory for QC output + + --sim=PATH Path to SIM file for intensity metrics. + See note [1] below. + + --dbpath=PATH Path to pipeline database .db file. Required. + + --inipath=PATH Path to .ini file containing general pipeline and + database configuration; local default is $DEFAULT_INI + + --vcf=STR Comma-separated list of paths to VCF files containing + QC plex calls for alternate identity check. See + note [2] below. + + --plex-manifests=STR Comma-separated list of paths to .tsv manifests for + QC plexes. See note [2]. + + --run=NAME Name of run in pipeline database (needed for database + update from gender check) + + --config=PATH Path to JSON config file; default is taken from + inipath + + --mafhet Find heterozygosity separately for SNP populations + with minor allele frequency greater than 1%, and + less than 1%. + + --sample-json=PATH Sample JSON file to relate Sanger sample IDs in VCF + to sample URIs in Plink data. + + --title Title for this analysis; will appear in plots + + --zcall-filter Apply default zcall filter; see note [3] below. + + --illuminus-filter Apply default illuminus filter; see note [3]. + + --filter=PATH Read custom filter criteria from PATH. See note [3]. + + --include Do not exclude failed samples from the pipeline DB. + See note [3] below. + +=head2 NOTES + +=over + +=item 1. + +If --sim is not specified, but the intensity files magnitude.txt and +xydiff.txt are present in the pipeline output directory, intensity metrics +will be read from the files. This allows intensity metrics to be computed only +once when multiple callers are used on the same dataset. + +=item 2. + +The --plex-manifest and --vcf options, with appropriate arguments, +are required to run the alternate identity check. If both these +options are not specified, the check will be omitted. Arguments to both +options are comma-separated lists of file paths; the individual paths may +not contain commas. The order of paths is not significant. + +=item 3. + +The --zcall, --illuminus, and --filter options enable \"prefilter\" mode: + +=over 2 + +=item * + +Samples which fail the filter criteria are excluded in the pipeline +SQLite DB. This ensures that failed samples are not input to +subsequent analyses using the same DB. + +=item * + +Filter criteria are determined by one of the following options: + +=over 3 + +=item 1. + +--illuminus Default illuminus criteria + +=item 2. + +--zcall Default zcall criteria + +=item 3. + +--filter=PATH Custom criteria, given by the JSON file at PATH. + +=back + +=item * + +If more than one of the above options is specified, an error is +raised. If none of them is specified, no filtering is carried out. + +=item * + +Additional CSV and JSON summary files are written to describe the +prefilter results. + +=item * + +If the --include option is in effect, filter summary files will be +written but samples will not be excluded from the SQLite DB. + +=back + + +=back + + +=head1 DESCRIPTION + +Main QC script for genotyping datasets. Runs a suite of QC metrics and +produces reports, plots, and supplementary data files. + +=head1 METHODS + +None + +=head1 AUTHOR + +Iain Bancarz + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2012, 2013, 2014, 2015, 2016 Genome Research Limited. +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the Perl Artistic License or the GNU General +Public License as published by the Free Software Foundation, either +version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +=cut diff --git a/src/perl/etc/ready_qc_fluidigm.json b/src/perl/etc/ready_qc_fluidigm.json index 565a1031a..c4f7d905c 100644 --- a/src/perl/etc/ready_qc_fluidigm.json +++ b/src/perl/etc/ready_qc_fluidigm.json @@ -1,5 +1,5 @@ { - "irods_data_path": "/seq/fluidigm", + "data_path": "/seq/fluidigm", "platform": "fluidigm", "reference_name": "Homo_sapiens (1000Genomes)", "reference_path": "/seq/fluidigm/multiplexes", diff --git a/src/perl/etc/ready_qc_sequenom.json b/src/perl/etc/ready_qc_sequenom.json index 344859280..ee0f0ebaf 100644 --- a/src/perl/etc/ready_qc_sequenom.json +++ b/src/perl/etc/ready_qc_sequenom.json @@ -1,7 +1,7 @@ { - "irods_data_path": "/seq/sequenom", + "data_path": "/seq/sequenom", "platform": "sequenom", - "reference_name": "Homo_sapiens (1000Genomes)", + "reference_name": "Homo_sapiens (GRCh37_53)", "reference_path": "/seq/sequenom/multiplexes", "snpset_name": "W30467", "read_snpset_version": "1.0", diff --git a/src/perl/etc/snpsets.ini b/src/perl/etc/snpsets.ini index b58361a6a..0539ba05f 100644 --- a/src/perl/etc/snpsets.ini +++ b/src/perl/etc/snpsets.ini @@ -30,10 +30,13 @@ name=HumanCoreExome-12v1-0 name=Human1M-Duov3_H name=HumanOmniExpressExome-8v1-2 name=HumanCoreExome-12v1-1 +name=HumanCoreExome-24v1-1 name=HumanMethylation450v1 name=HumanCoreExome-24v1-0 name=HumanOmni25Exome-8v1-1 name=MEGA_Consortium +name=MEGA_Consortium_v2 +name=Infinium-MethylationEPIC [sequenom] name=W30467 diff --git a/src/perl/lib/WTSI/NPG/Annotator.pm b/src/perl/lib/WTSI/NPG/Annotator.pm index 6b9aa23ea..68b3aa3d4 100644 --- a/src/perl/lib/WTSI/NPG/Annotator.pm +++ b/src/perl/lib/WTSI/NPG/Annotator.pm @@ -5,6 +5,7 @@ use Data::Dump qw(dump); use Moose::Role; use File::Basename; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants use WTSI::NPG::Utilities qw(md5sum); our $VERSION = ''; @@ -13,7 +14,7 @@ our @DEFAULT_FILE_SUFFIXES = qw(.csv .gtc .idat .tif .tsv .txt .xls .xlsx .xml); our $SEQUENCESCAPE_LIMS_ID = 'SQSCP'; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Annotation'; +with 'WTSI::DNAP::Utilities::Loggable'; =head2 make_creation_metadata @@ -30,9 +31,9 @@ with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Annotation'; sub make_creation_metadata { my ($self, $creator, $creation_time, $publisher) = @_; - return ([$self->dcterms_creator_attr => $creator->as_string], - [$self->dcterms_created_attr => $creation_time->iso8601], - [$self->dcterms_publisher_attr => $publisher->as_string]); + return ([$DCTERMS_CREATOR => $creator->as_string], + [$DCTERMS_CREATED => $creation_time->iso8601], + [$DCTERMS_PUBLISHER => $publisher->as_string]); } =head2 make_modification_metadata @@ -49,7 +50,7 @@ sub make_creation_metadata { sub make_modification_metadata { my ($self, $modification_time) = @_; - return ([$self->dcterms_modified_attr => $modification_time->iso8601]); + return ([$DCTERMS_MODIFIED => $modification_time->iso8601]); } =head2 make_sample_metadata @@ -81,7 +82,7 @@ sub make_sample_metadata { $flag = 1; } - push @meta, [$self->sample_consent_attr => $flag]; + push @meta, [$SAMPLE_CONSENT => $flag]; } else { $self->logcarp(sprintf($msg, 'consent_withdrawn', dump($record))); @@ -115,38 +116,38 @@ sub make_sample_metadata { if ($record->{id_lims} eq $SEQUENCESCAPE_LIMS_ID) { # Sample processed by Sequencescape; sanger_sample_id must be # present - $ensure->('sanger_sample_id', $self->dcterms_identifier_attr); + $ensure->('sanger_sample_id', $DCTERMS_IDENTIFIER); } else { # Sample processed elsewhere; sanger_sample_id may not be # present - $maybe->('sanger_sample_id', $self->dcterms_identifier_attr); + $maybe->('sanger_sample_id', $DCTERMS_IDENTIFIER); } # Sample ID comes from 'id_sample_lims' column - $ensure->('id_sample_lims', $self->sample_id_attr); + $ensure->('id_sample_lims', $SAMPLE_ID); } else { # This metadata obtained from Sequencescape warehouse. # Sample processed by Sequencescape; sanger_sample_id must be # present - $ensure->('sanger_sample_id', $self->dcterms_identifier_attr); + $ensure->('sanger_sample_id', $DCTERMS_IDENTIFIER); # Sample ID comes from 'internal_id' column - $ensure->('internal_id', $self->sample_id_attr); + $ensure->('internal_id', $SAMPLE_ID); } - $ensure->('name', $self->sample_name_attr); - $ensure->('study_id', $self->study_id_attr); + $ensure->('name', $SAMPLE_NAME); + $ensure->('study_id', $STUDY_ID); - $maybe->('study_title', $self->study_title_attr); - $maybe->('supplier_name', $self->sample_supplier_name_attr); - $maybe->('accession_number', $self->sample_accession_number_attr); - $maybe->('cohort', $self->sample_cohort_attr); - $maybe->('control', $self->sample_control_attr); - $maybe->('donor_id', $self->sample_donor_id_attr); - $maybe->('common_name', $self->sample_common_name_attr); + $maybe->('study_title', $STUDY_TITLE); + $maybe->('supplier_name', $SAMPLE_SUPPLIER_NAME); + $maybe->('accession_number', $SAMPLE_ACCESSION_NUMBER); + $maybe->('cohort', $SAMPLE_COHORT); + $maybe->('control', $SAMPLE_CONTROL); + $maybe->('donor_id', $SAMPLE_DONOR_ID); + $maybe->('common_name', $SAMPLE_COMMON_NAME); return @meta; } @@ -175,7 +176,7 @@ sub make_type_metadata { my @meta; if ($suffix) { - push @meta, [$self->file_type_attr => $suffix]; + push @meta, [$FILE_TYPE => $suffix]; } return @meta; @@ -200,7 +201,7 @@ sub make_md5_metadata { $self->logconfess("Failed to make MD5 for '$file'"); } - return ([$self->file_md5_attr => $md5]); + return ([$FILE_MD5 => $md5]); } =head2 make_ticket_metadata @@ -217,7 +218,7 @@ sub make_md5_metadata { sub make_ticket_metadata { my ($self, $ticket_number) = @_; - return ([$self->ticket_attr => $ticket_number]); + return ([$RT_TICKET => $ticket_number]); } sub make_fingerprint { diff --git a/src/perl/lib/WTSI/NPG/Expression/AnalysisPublisher.pm b/src/perl/lib/WTSI/NPG/Expression/AnalysisPublisher.pm index c75d2adb2..77318eb83 100644 --- a/src/perl/lib/WTSI/NPG/Expression/AnalysisPublisher.pm +++ b/src/perl/lib/WTSI/NPG/Expression/AnalysisPublisher.pm @@ -14,6 +14,7 @@ use WTSI::NPG::Publisher; use WTSI::NPG::SimplePublisher; use WTSI::NPG::Utilities qw(collect_files); use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants our $VERSION = ''; @@ -62,7 +63,7 @@ sub BUILD { } sub publish { - my ($self, $publish_dest, $uuid) = @_; + my ($self, $publish_dest, $input_uuid) = @_; defined $publish_dest or $self->logconfess('A defined publish_dest argument is required'); @@ -78,7 +79,7 @@ sub publish { my $irods = $self->irods; my $analysis_coll; - my $analysis_uuid; + my $uuid; my $num_samples = 0; my $num_objects = 0; @@ -86,39 +87,39 @@ sub publish { # Analysis directory my @analysis_meta; - push(@analysis_meta, $self->make_analysis_metadata($uuid)); - unless ($uuid) { + push(@analysis_meta, $self->make_analysis_metadata($input_uuid)); + unless ($input_uuid) { push(@analysis_meta, $self->make_creation_metadata($self->affiliation_uri, $self->publication_time, $self->accountee_uri)); } - my @uuid_meta = grep { $_->[0] eq $self->analysis_uuid_attr } + my @uuid_meta = grep { $_->[0] eq $ANALYSIS_UUID } @analysis_meta; - $analysis_uuid = $uuid_meta[0]->[1]; - if ($analysis_uuid) { - $self->debug("Found analysis_uuid '$analysis_uuid' in metadata: ", + $uuid = $uuid_meta[0]->[1]; + if ($uuid) { + $self->debug("Found analysis_uuid '$uuid' in metadata: ", dump(\@analysis_meta)); } else { $self->logconfess("Failed to find an analysis UUID in metadata: ", dump(\@analysis_meta)); } - $analysis_coll = $self->ensure_analysis_collection($publish_dest, $uuid); my @analysis_files = $self->find_analysis_files; foreach my $file (@analysis_files) { - $self->publish_analysis_file($analysis_coll, $file, $analysis_uuid); + $self->publish_analysis_file($analysis_coll, $file, $uuid); } foreach my $sample (@{$self->manifest->samples}) { my @sample_objects = $irods->find_objects_by_meta ($self->sample_archive, - [$self->dcterms_identifier_attr => $sample->{sample_id}], - [$self->expression_beadchip_attr => $sample->{beadchip}], - [$self->expression_beadchip_section_attr => $sample->{beadchip_section}]); + [$DCTERMS_IDENTIFIER => $sample->{sample_id}], + [$EXPRESSION_BEADCHIP => $sample->{beadchip}], + [$EXPRESSION_BEADCHIP_SECTION => $sample->{beadchip_section}]); + unless (@sample_objects) { $self->logconfess("Failed to find data in iRODS in sample archive '", $self->sample_archive, "' for sample '", @@ -133,7 +134,7 @@ sub publish { # Xref analysis to sample studies my @studies = map { $_->{value} } - $obj->find_in_metadata($self->study_id_attr); + $obj->find_in_metadata($STUDY_ID); if (@studies) { $self->debug("Sample '", $sample->{sample_id}, "' has metadata for ", @@ -141,7 +142,7 @@ sub publish { foreach my $study (@studies) { unless (exists $studies_seen{$study}) { - push(@analysis_meta, [$self->study_id_attr => $study]); + push(@analysis_meta, [$STUDY_ID => $study]); $studies_seen{$study}++; } } @@ -153,7 +154,7 @@ sub publish { } # Xref samples to analysis UUID - $obj->add_avu($self->analysis_uuid_attr, $analysis_uuid); + $obj->add_avu($ANALYSIS_UUID, $uuid); ++$num_objects; } @@ -173,10 +174,10 @@ sub publish { "data objects for $num_samples samples"); } catch { $self->error("Failed to publish: ", $_); - undef $analysis_uuid; + undef $uuid; }; - return $analysis_uuid; + return $uuid; } sub ensure_analysis_collection { diff --git a/src/perl/lib/WTSI/NPG/Expression/Annotation.pm b/src/perl/lib/WTSI/NPG/Expression/Annotation.pm deleted file mode 100644 index fd184d1be..000000000 --- a/src/perl/lib/WTSI/NPG/Expression/Annotation.pm +++ /dev/null @@ -1,64 +0,0 @@ -use utf8; - -package WTSI::NPG::Expression::Annotation; - -use Moose::Role; - -our $VERSION = ''; - -our %EXPRESSION_METADATA_ATTR = - (analysis_uuid => 'analysis_uuid', - expression_project_title => 'dcterms:title', - expression_beadchip => 'beadchip', - expression_beadchip_design => 'beadchip_design', - expression_beadchip_section => 'beadchip_section', - expression_plate_name => 'gex_plate', - expression_plate_well => 'gex_well', - expression_norm_method => 'normalisation_method', - expression_summary_group => 'summary_group', - expression_summary_type => 'summary_type'); - -my $meta = __PACKAGE__->meta; - -foreach my $attr_name (keys %EXPRESSION_METADATA_ATTR) { - my %options = (is => 'ro', - isa => 'Str', - required => 1, - default => $EXPRESSION_METADATA_ATTR{$attr_name}); - - $meta->add_attribute($attr_name . '_attr', %options); -} - -no Moose; - -1; - -__END__ - -=head1 NAME - -Annotation - Metadata attribute names. - -=head1 DESCRIPTION - -Provides methods to access metadata attribute names. - -=head1 AUTHOR - -Keith James - -=head1 COPYRIGHT AND DISCLAIMER - -Copyright (c) 2014 Genome Research Limited. All Rights Reserved. - -This program is free software: you can redistribute it and/or modify -it under the terms of the Perl Artistic License or the GNU General -Public License as published by the Free Software Foundation, either -version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -=cut diff --git a/src/perl/lib/WTSI/NPG/Expression/Annotator.pm b/src/perl/lib/WTSI/NPG/Expression/Annotator.pm index a8f7e979d..6e69df1ec 100644 --- a/src/perl/lib/WTSI/NPG/Expression/Annotator.pm +++ b/src/perl/lib/WTSI/NPG/Expression/Annotator.pm @@ -6,7 +6,9 @@ use List::AllUtils qw(any); use Moose::Role; use UUID; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Expression::Annotation'; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants + +with 'WTSI::DNAP::Utilities::Loggable'; our $VERSION = ''; @@ -28,15 +30,15 @@ sub make_infinium_metadata { my ($self, $resultset) = @_; my @meta = - ([$self->dcterms_identifier_attr => $resultset->sample_id], - [$self->expression_beadchip_attr => $resultset->beadchip], - [$self->expression_beadchip_section_attr => $resultset->beadchip_section]); + ([$DCTERMS_IDENTIFIER => $resultset->sample_id], + [$EXPRESSION_BEADCHIP => $resultset->beadchip], + [$EXPRESSION_BEADCHIP_SECTION => $resultset->beadchip_section]); if ($resultset->plate_id) { - push @meta, [$self->expression_plate_name_attr => $resultset->plate_id]; + push @meta, [$EXPRESSION_PLATE_NAME => $resultset->plate_id]; } if ($resultset->well_id) { - push @meta, [$self->expression_plate_well_attr => $resultset->well_id]; + push @meta, [$EXPRESSION_PLATE_WELL => $resultset->well_id]; } return @meta; @@ -51,9 +53,9 @@ sub make_profile_metadata { or $self->logconfess("Invalid summary type '$type'"); my @meta = - ([$self->expression_summary_group_attr => $grouping], - [$self->expression_summary_type_attr => $type], - [$self->expression_norm_method_attr => $normalisation_method]); + ([$EXPRESSION_SUMMARY_GROUP => $grouping], + [$EXPRESSION_SUMMARY_TYPE => $type], + [$EXPRESSION_NORM_METHOD => $normalisation_method]); return @meta; } @@ -64,12 +66,12 @@ sub make_profile_annotation_metadata { any { $type eq $_ } @VALID_SUMMARY_TYPES or $self->logconfess("Invalid summary type '$type'"); - return ([$self->expression_summary_type_attr => $type]); + return ([$EXPRESSION_SUMMARY_TYPE => $type]); } =head2 make_analysis_metadata - Arg [1] : UUID to use instead of gereating a new one. Optional. + Arg [1] : UUID to use instead of generating a new one. Optional. Example : my @meta = $obj->make_analysis_metadata() Description: Return a list of metadata key/value pairs describing an analysis. Returntype : array of arrayrefs @@ -90,7 +92,7 @@ sub make_analysis_metadata { UUID::unparse($uuid_bin, $uuid_str); } - my @meta = ([$self->analysis_uuid_attr => $uuid_str]); + my @meta = ([$ANALYSIS_UUID => $uuid_str]); return @meta; } @@ -98,10 +100,10 @@ sub make_analysis_metadata { sub infinium_fingerprint { my ($self, @meta) = @_; - return $self->make_fingerprint([$self->expression_beadchip_attr, - $self->expression_beadchip_section_attr, - $self->expression_plate_name_attr, - $self->expression_plate_well_attr], + return $self->make_fingerprint([$EXPRESSION_BEADCHIP, + $EXPRESSION_BEADCHIP_SECTION, + $EXPRESSION_PLATE_NAME, + $EXPRESSION_PLATE_WELL], \@meta); } diff --git a/src/perl/lib/WTSI/NPG/Expression/InfiniumDataObject.pm b/src/perl/lib/WTSI/NPG/Expression/InfiniumDataObject.pm index 1f73fb5ec..321e032dc 100644 --- a/src/perl/lib/WTSI/NPG/Expression/InfiniumDataObject.pm +++ b/src/perl/lib/WTSI/NPG/Expression/InfiniumDataObject.pm @@ -7,12 +7,14 @@ use Data::Dump qw(dump); use Moose; use Try::Tiny; -our $VERSION = ''; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants -with 'WTSI::NPG::Annotator', 'WTSI::NPG::Expression::Annotator'; +our $VERSION = ''; extends 'WTSI::NPG::iRODS::DataObject'; +with 'WTSI::NPG::Annotator', 'WTSI::NPG::Expression::Annotator'; + sub update_secondary_metadata { my ($self, $ssdb) = @_; @@ -20,17 +22,17 @@ sub update_secondary_metadata { my $well; my $sample_id; - my $plate_avu = $self->get_avu($self->expression_plate_name_attr); + my $plate_avu = $self->get_avu($EXPRESSION_PLATE_NAME); if ($plate_avu) { $plate = $plate_avu->{value}; } - my $well_avu = $self->get_avu($self->expression_plate_well_attr); + my $well_avu = $self->get_avu($EXPRESSION_PLATE_WELL); if ($well_avu) { $well = $well_avu->{value}; } - my $sample_id_avu = $self->get_avu($self->dcterms_identifier_attr); + my $sample_id_avu = $self->get_avu($DCTERMS_IDENTIFIER); if ($sample_id_avu) { $sample_id = $sample_id_avu->{value}; } diff --git a/src/perl/lib/WTSI/NPG/Expression/ProfileAnnotation.pm b/src/perl/lib/WTSI/NPG/Expression/ProfileAnnotation.pm index a1d019c1c..fbb6725c1 100644 --- a/src/perl/lib/WTSI/NPG/Expression/ProfileAnnotation.pm +++ b/src/perl/lib/WTSI/NPG/Expression/ProfileAnnotation.pm @@ -6,8 +6,7 @@ use Moose; our $VERSION = ''; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Expression::Annotation', - 'WTSI::NPG::iRODS::Storable'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Storable'; around BUILDARGS => sub { my ($orig, $class, @args) = @_; diff --git a/src/perl/lib/WTSI/NPG/Expression/SampleProbeProfile.pm b/src/perl/lib/WTSI/NPG/Expression/SampleProbeProfile.pm index c1df5f6fc..399d57b9c 100644 --- a/src/perl/lib/WTSI/NPG/Expression/SampleProbeProfile.pm +++ b/src/perl/lib/WTSI/NPG/Expression/SampleProbeProfile.pm @@ -8,8 +8,7 @@ use WTSI::NPG::Utilities qw(trim); our $VERSION = ''; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Expression::Annotation', - 'WTSI::NPG::iRODS::Storable'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Storable'; our $NORMALISATION_HEADER_PROPERTY = 'Normalization'; diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Annotation.pm b/src/perl/lib/WTSI/NPG/Genotyping/Annotation.pm deleted file mode 100644 index 9e36c4e4c..000000000 --- a/src/perl/lib/WTSI/NPG/Genotyping/Annotation.pm +++ /dev/null @@ -1,69 +0,0 @@ -use utf8; - -package WTSI::NPG::Genotyping::Annotation; - -use Moose::Role; - -our $VERSION = ''; - -our %GENOTYPING_METADATA_ATTR = - (analysis_uuid => 'analysis_uuid', - infinium_project_title => 'dcterms:title', - infinium_beadchip => 'beadchip', - infinium_beadchip_design => 'beadchip_design', - infinium_beadchip_section => 'beadchip_section', - infinium_plate_name => 'infinium_plate', - infinium_plate_well => 'infinium_well', - infinium_sample_name => 'infinium_sample', - sequenom_plate_name => 'sequenom_plate', - sequenom_plate_well => 'sequenom_well', - sequenom_plex_name => 'sequenom_plex', - fluidigm_plate_name => 'fluidigm_plate', - fluidigm_plate_well => 'fluidigm_well', - fluidigm_plex_name => 'fluidigm_plex', - manual_qc => 'manual_qc'); - -my $meta = __PACKAGE__->meta; - -foreach my $attr_name (keys %GENOTYPING_METADATA_ATTR) { - my %options = (is => 'ro', - isa => 'Str', - required => 1, - default => $GENOTYPING_METADATA_ATTR{$attr_name}); - - $meta->add_attribute($attr_name . '_attr', %options); -} - -no Moose; - -1; - -__END__ - -=head1 NAME - -Annotation - Metadata attribute names. - -=head1 DESCRIPTION - -Provides methods to access metadata attribute names. - -=head1 AUTHOR - -Keith James - -=head1 COPYRIGHT AND DISCLAIMER - -Copyright (c) 2014 Genome Research Limited. All Rights Reserved. - -This program is free software: you can redistribute it and/or modify -it under the terms of the Perl Artistic License or the GNU General -Public License as published by the Free Software Foundation, either -version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -=cut diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Annotator.pm b/src/perl/lib/WTSI/NPG/Genotyping/Annotator.pm index 1dbead72a..d76bf659e 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Annotator.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Annotator.pm @@ -5,9 +5,11 @@ package WTSI::NPG::Genotyping::Annotator; use Moose::Role; use UUID; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants + our $VERSION = ''; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Genotyping::Annotation'; +with 'WTSI::DNAP::Utilities::Loggable'; =head2 make_infinium_metadata @@ -24,33 +26,33 @@ sub make_infinium_metadata { my ($self, $if_sample) = @_; return - ([$self->infinium_beadchip_attr => $if_sample->{beadchip}], - [$self->infinium_beadchip_section_attr => $if_sample->{beadchip_section}], - [$self->infinium_beadchip_design_attr => $if_sample->{beadchip_design}], - [$self->infinium_project_title_attr => $if_sample->{project}], - [$self->infinium_sample_name_attr => $if_sample->{sample}], - [$self->infinium_plate_name_attr => $if_sample->{plate}], - [$self->infinium_plate_well_attr => $if_sample->{well}]); + ([$INFINIUM_BEADCHIP => $if_sample->{beadchip}], + [$INFINIUM_BEADCHIP_SECTION => $if_sample->{beadchip_section}], + [$INFINIUM_BEADCHIP_DESIGN => $if_sample->{beadchip_design}], + [$INFINIUM_PROJECT_TITLE => $if_sample->{project}], + [$INFINIUM_SAMPLE_NAME => $if_sample->{sample}], + [$INFINIUM_PLATE_NAME => $if_sample->{plate}], + [$INFINIUM_PLATE_WELL => $if_sample->{well}]); } sub make_sequenom_metadata { my ($self, $well) = @_; - return ([$self->sequenom_plate_name_attr => $well->{plate}], - [$self->sequenom_plate_well_attr => $well->{well}]); + return ([$SEQUENOM_PLATE_NAME => $well->{plate}], + [$SEQUENOM_PLATE_WELL => $well->{well}]); } sub make_fluidigm_metadata { my ($self, $well) = @_; - return ([$self->fluidigm_plate_name_attr => $well->{plate}], - [$self->fluidigm_plate_well_attr => $well->{well}]); + return ([$FLUIDIGM_PLATE_NAME => $well->{plate}], + [$FLUIDIGM_PLATE_WELL => $well->{well}]); } sub make_manual_qc_metadata { my ($self, $manual_qc) = @_; - return ([$self->manual_qc_attr => $manual_qc]); + return ([$QC_STATE => $manual_qc]); } =head2 make_analysis_metadata @@ -72,10 +74,10 @@ sub make_analysis_metadata { UUID::generate($uuid_bin); UUID::unparse($uuid_bin, $uuid_str); - my @meta = ([$self->analysis_uuid_attr => $uuid_str]); + my @meta = ([$ANALYSIS_UUID => $uuid_str]); foreach my $title (@$genotyping_project_titles) { - push(@meta, [$self->infinium_project_title_attr => $title]); + push(@meta, [$INFINIUM_PROJECT_TITLE => $title]); } return @meta; @@ -84,29 +86,29 @@ sub make_analysis_metadata { sub infinium_fingerprint { my ($self, @meta) = @_; - return $self->make_fingerprint([$self->infinium_beadchip_attr, - $self->infinium_beadchip_section_attr, - $self->infinium_beadchip_design_attr, - $self->infinium_project_title_attr, - $self->infinium_sample_name_attr, - $self->infinium_plate_name_attr, - $self->infinium_plate_well_attr], + return $self->make_fingerprint([$INFINIUM_BEADCHIP, + $INFINIUM_BEADCHIP_SECTION, + $INFINIUM_BEADCHIP_DESIGN, + $INFINIUM_PROJECT_TITLE, + $INFINIUM_SAMPLE_NAME, + $INFINIUM_PLATE_NAME, + $INFINIUM_PLATE_WELL], \@meta); } sub sequenom_fingerprint { my ($self, @meta) = @_; - return $self->make_fingerprint([$self->sequenom_plate_name_attr, - $self->sequenom_plate_well_attr], + return $self->make_fingerprint([$SEQUENOM_PLATE_NAME, + $SEQUENOM_PLATE_WELL], \@meta); } sub fluidigm_fingerprint { my ($self, @meta) = @_; - return $self->make_fingerprint([$self->fluidigm_plate_name_attr, - $self->fluidigm_plate_well_attr], + return $self->make_fingerprint([$FLUIDIGM_PLATE_NAME, + $FLUIDIGM_PLATE_WELL], \@meta); } diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayDataObject.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayDataObject.pm index 510b291f5..22d4a52d2 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayDataObject.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayDataObject.pm @@ -5,14 +5,15 @@ use Data::Dump qw(dump); use Moose; use Try::Tiny; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants use WTSI::NPG::Genotyping::Fluidigm::AssayResultSet; our $VERSION = ''; -with 'WTSI::NPG::Annotator', 'WTSI::NPG::Genotyping::Annotator'; - extends 'WTSI::NPG::iRODS::DataObject'; +with 'WTSI::NPG::Annotator', 'WTSI::NPG::Genotyping::Annotator'; + =head2 assay_resultset Arg [1] : None @@ -36,12 +37,12 @@ sub update_secondary_metadata { my $fluidigm_barcode; my $well; - my $fluidigm_barcode_avu = $self->get_avu($self->fluidigm_plate_name_attr); + my $fluidigm_barcode_avu = $self->get_avu($FLUIDIGM_PLATE_NAME); if ($fluidigm_barcode_avu) { $fluidigm_barcode = $fluidigm_barcode_avu->{value}; } - my $well_avu = $self->get_avu($self->fluidigm_plate_well_attr); + my $well_avu = $self->get_avu($FLUIDIGM_PLATE_WELL); if ($well_avu) { $well = $well_avu->{value}; } diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/ExportFile.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/ExportFile.pm index 8b7a56091..a085be2bf 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/ExportFile.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/ExportFile.pm @@ -6,6 +6,7 @@ package WTSI::NPG::Genotyping::Fluidigm::ExportFile; use Moose; use Text::CSV; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants use WTSI::NPG::Utilities qw(trim); our $VERSION = ''; @@ -16,8 +17,7 @@ our $HEADER_BARCODE_COL = 2; our $HEADER_CONF_THRESHOLD_ROW = 5; our $HEADER_CONF_THRESHOLD_COL = 1; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Addressable', - 'WTSI::NPG::Genotyping::Annotation'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Addressable'; has 'file_name' => (is => 'ro', @@ -165,8 +165,8 @@ sub fluidigm_metadata { $self->logconfess("FluidigmExportFile '", $self->fluidigm_barcode, "' has no sample address '$address'"); - return ([$self->fluidigm_plate_name_attr => $self->fluidigm_barcode], - [$self->fluidigm_plate_well_attr => $address]); + return ([$FLUIDIGM_PLATE_NAME => $self->fluidigm_barcode], + [$FLUIDIGM_PLATE_WELL => $address]); } =head2 fluidigm_fingerprint diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Publisher.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Publisher.pm index 157e28b32..7adc62a5b 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Publisher.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Publisher.pm @@ -13,6 +13,8 @@ use WTSI::NPG::Genotyping::Fluidigm::AssayDataObject; use WTSI::NPG::Genotyping::Fluidigm::ExportFile; use WTSI::NPG::Genotyping::SNPSet; use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; + use WTSI::NPG::Publisher; our $VERSION = ''; @@ -153,8 +155,8 @@ sub publish_samples { $self->debug("Publishing raw Fluidigm CSV data file '", $self->resultset->export_file, "'"); my @meta = - ([$self->fluidigm_plate_name_attr => $export_file->fluidigm_barcode], - [$self->dcterms_audience_attr => $self->audience_uri->as_string]); + ([$FLUIDIGM_PLATE_NAME => $export_file->fluidigm_barcode], + [$DCTERMS_AUDIENCE => $self->audience_uri->as_string]); # Publish the unsplit file $publisher->publish_file($self->resultset->export_file, \@meta, @@ -189,7 +191,7 @@ sub publish_samples { my $snpset_name = $self->_find_snpset_name($snpset); my $obj = WTSI::NPG::Genotyping::Fluidigm::AssayDataObject->new - ($self->irods, $rods_path)->add_avu($self->fluidigm_plex_name_attr, + ($self->irods, $rods_path)->add_avu($FLUIDIGM_PLEX_NAME, $snpset_name); # Now that adding the secondary metadata is fast enough, we can @@ -255,8 +257,8 @@ sub _build_snpsets { my @snpset_paths = $self->irods->find_objects_by_meta ($self->reference_path, - [$self->fluidigm_plex_name_attr => '%', 'like'], - [$self->reference_genome_name_attr => $self->reference_name]); + [$FLUIDIGM_PLEX_NAME => '%', 'like'], + [$REFERENCE_GENOME_NAME => $self->reference_name]); my @snpsets; foreach my $rods_path (@snpset_paths) { @@ -286,7 +288,7 @@ sub _find_resultset_snpset { my @matched; foreach my $snpset (@{$self->snpsets}) { my @names = $snpset->data_object->find_in_metadata - ($self->fluidigm_plex_name_attr); + ($FLUIDIGM_PLEX_NAME); my @snp_names = $snpset->snp_names; my $num_snps = scalar @snp_names; @@ -330,7 +332,7 @@ sub _find_snpset_name { my ($self, $snpset) = @_; my @snpset_names = map { $_->{value} } - $snpset->data_object->find_in_metadata($self->fluidigm_plex_name_attr); + $snpset->data_object->find_in_metadata($FLUIDIGM_PLEX_NAME); my $num_names = scalar @snpset_names; $num_names > 0 or diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Subscriber.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Subscriber.pm index 57f3881cb..0b496288d 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Subscriber.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Subscriber.pm @@ -12,6 +12,7 @@ use WTSI::NPG::Genotyping::Fluidigm::AssayResultSet; use WTSI::NPG::Genotyping::SNP; use WTSI::NPG::Genotyping::SNPSet; use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants our $VERSION = ''; @@ -20,8 +21,7 @@ our $NO_CALL_GENOTYPE = 'NN'; our $CHROMOSOME_JSON_ATTR = 'chromosome_json'; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Annotation', - 'WTSI::NPG::Genotyping::Annotation', 'WTSI::NPG::Genotyping::Subscription'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Genotyping::Subscription'; has '_plex_name_attr' => (is => 'ro', @@ -29,44 +29,11 @@ has '_plex_name_attr' => init_arg => undef, default => sub { my ($self) = @_; - return $self->fluidigm_plex_name_attr; + return $FLUIDIGM_PLEX_NAME; }, lazy => 1, documentation => 'iRODS attribute for QC plex name'); -=head2 get_assay_resultset - - Arg [1] : Str sample identifier (dcterms:identifier) - Arg [n] : Optional additional query specs as ArrayRefs. - Example : $sub->get_assay_resultset('qc', '0123456789', - [study => 12345]); - Description: Fetch an assay result by SNP set, sample and other optional - criteria. Raises an error if the query finds >1 result set. - Returntype : WTSI::NPG::Genotyping::Fluidigm::AssayResultSet - -=cut - -sub get_assay_resultset { - my ($self, $sample_identifier, @query_specs) = @_; - my ($resultsets, $vcf_meta) = $self->get_assay_resultsets_and_vcf_metadata - ([$sample_identifier], @query_specs); - - my $num_samples = scalar keys %$resultsets; - if ($num_samples > 1) { - $self->logconfess("The assay results query returned data for >1 ", - "sample: [", join(q{, }, keys %$resultsets, "]")); - } - - my @resultsets = @{$resultsets->{$sample_identifier}}; - my $num_resultsets = scalar @resultsets; - if ($num_resultsets > 1) { - $self->logconfess("The assay results query was not specific enough; ", - "$num_resultsets result sets were returned: [", - join(q{, }, map { $_->str } @resultsets), "]"); - } - - return shift @resultsets; -} =head2 get_assay_resultsets_and_vcf_metadata @@ -189,6 +156,22 @@ sub get_calls { return \@calls; } + +=head2 platform_name + + Arg [1] : None + Example : my $name = $sub->platform_name(); + Description: Return an identifier string for the genotyping platform; + in this case, 'fluidigm'. Used to construct a default + callset name in the Subscription role. + Returntype : Str + +=cut + +sub platform_name { + return 'fluidigm'; +} + sub _build_calls_at { my ($self, $assay_address, $resultsets) = @_; @@ -204,7 +187,7 @@ sub _build_calls_at { else { $self->trace("Adding fluidigm result at '$assay_address'"); - my $snp = $self->snpset->named_snp($result->snp_assayed); + my $snp = $self->read_snpset->named_snp($result->snp_assayed); push @calls, WTSI::NPG::Genotyping::Call->new(snp => $snp, genotype => $result->canonical_call, @@ -248,11 +231,11 @@ specific samples from iRODS. =head1 AUTHOR -Keith James +Keith James , Iain Bancarz =head1 COPYRIGHT AND DISCLAIMER -Copyright (C) 2014, 2015 Genome Research Limited. All Rights Reserved. +Copyright (C) 2014, 2015, 2016 Genome Research Limited. All Rights Reserved. This program is free software: you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Infinium/AnalysisPublisher.pm b/src/perl/lib/WTSI/NPG/Genotyping/Infinium/AnalysisPublisher.pm index 5fd7541d4..dd43c6518 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Infinium/AnalysisPublisher.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Infinium/AnalysisPublisher.pm @@ -8,6 +8,7 @@ use Moose; use Try::Tiny; use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants use WTSI::NPG::Publisher; our $VERSION = ''; @@ -136,7 +137,7 @@ sub publish { $self->info("Created new collection '", $analysis_coll->str, "'"); - my @uuid_meta = grep { $_->[0] eq $self->analysis_uuid_attr } + my @uuid_meta = grep { $_->[0] eq $ANALYSIS_UUID } @analysis_meta; $analysis_uuid = $uuid_meta[0]->[1]; @@ -165,9 +166,9 @@ sub publish { my @sample_objects = $irods->find_objects_by_meta ($self->sample_archive, - [$self->dcterms_title_attr => $project_title], - [$self->infinium_beadchip_attr => $sample->beadchip], - [$self->infinium_beadchip_section_attr => $sample->rowcol]); + [$DCTERMS_TITLE => $project_title], + [$INFINIUM_BEADCHIP => $sample->beadchip], + [$INFINIUM_BEADCHIP_SECTION => $sample->rowcol]); unless (@sample_objects) { $self->logconfess("Failed to find data in iRODS in sample archive '", @@ -182,7 +183,7 @@ sub publish { # Xref analysis to sample studies my @studies = map { $_->{value} } - $obj->find_in_metadata($self->study_id_attr); + $obj->find_in_metadata($STUDY_ID); if (@studies) { $self->debug("Sample '$included_sample_name' has metadata for ", @@ -190,7 +191,7 @@ sub publish { foreach my $study (@studies) { unless (exists $studies_seen{$study}) { - push(@analysis_meta, [$self->study_id_attr => $study]); + push(@analysis_meta, [$STUDY_ID => $study]); $studies_seen{$study}++; } } @@ -202,7 +203,7 @@ sub publish { } # Xref samples to analysis UUID - $obj->add_avu($self->analysis_uuid_attr, $analysis_uuid); + $obj->add_avu($ANALYSIS_UUID, $analysis_uuid); ++$num_objects; } diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Infinium/InfiniumDataObject.pm b/src/perl/lib/WTSI/NPG/Genotyping/Infinium/InfiniumDataObject.pm index 4364ff75e..292aff5ec 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Infinium/InfiniumDataObject.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Infinium/InfiniumDataObject.pm @@ -5,24 +5,26 @@ use Data::Dump qw(dump); use Moose; use Try::Tiny; -our $VERSION = ''; +use WTSI::NPG::iRODS::Metadata; -with 'WTSI::NPG::Annotator', 'WTSI::NPG::Genotyping::Annotator'; +our $VERSION = ''; extends 'WTSI::NPG::iRODS::DataObject'; +with 'WTSI::NPG::Annotator', 'WTSI::NPG::Genotyping::Annotator'; + sub update_secondary_metadata { my ($self, $ssdb) = @_; my $infinium_barcode; my $well; - my $infinium_barcode_avu = $self->get_avu($self->infinium_plate_name_attr); + my $infinium_barcode_avu = $self->get_avu($INFINIUM_PLATE_NAME); if ($infinium_barcode_avu) { $infinium_barcode = $infinium_barcode_avu->{value}; } - my $well_avu = $self->get_avu($self->infinium_plate_well_attr); + my $well_avu = $self->get_avu($INFINIUM_PLATE_WELL); if ($well_avu) { $well = $well_avu->{value}; } diff --git a/src/perl/lib/WTSI/NPG/Genotyping/QC/Collation.pm b/src/perl/lib/WTSI/NPG/Genotyping/QC/Collation.pm index 46351dec6..67f058dc8 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/QC/Collation.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/QC/Collation.pm @@ -9,6 +9,7 @@ package WTSI::NPG::Genotyping::QC::Collation; use strict; use warnings; use Carp; +use File::Slurp qw(read_file); use IO::Uncompress::Gunzip qw($GunzipError); # for duplicate_full.txt.gz use JSON; use WTSI::NPG::Genotyping::Database::Pipeline; @@ -16,7 +17,6 @@ use WTSI::NPG::Genotyping::QC::QCPlotShared qw(getDatabaseObject getPlateLocationsFromPath meanSd readQCMetricInputs - readFileToString readSampleData); use Exporter; @@ -457,7 +457,7 @@ sub readDuplicates { sub readMetricThresholds { # exportable convenience method to read metric thresholds from JSON config my $configPath = shift; - my %config = %{decode_json(readFileToString($configPath))}; + my %config = %{decode_json(read_file($configPath))}; my %thresholds = %{$config{'Metrics_thresholds'}}; return \%thresholds; } @@ -523,7 +523,7 @@ sub resultsHighMafHet { sub resultsIdentity { my $inputDir = shift; my $inPath = $inputDir.'/'.$FILENAMES{'identity'}; - my %data = %{decode_json(readFileToString($inPath))}; + my %data = %{decode_json(read_file($inPath))}; return $data{'results'}; } @@ -542,7 +542,7 @@ sub resultsMafHet { carp "Omitting MAF heterozygosity; cannot read input \"$inPath\": $!"; return 0; } - my %data = %{decode_json(readFileToString($inPath))}; + my %data = %{decode_json(read_file($inPath))}; my %results; foreach my $sample (keys(%data)) { # TODO modify output format of het_by_maf.py @@ -696,7 +696,7 @@ sub collate { else { @metricNames = keys(%thresholdConfig); } - %config = %{decode_json(readFileToString($configPath))}; + %config = %{decode_json(read_file($configPath))}; %FILENAMES = %{$config{'collation_names'}}; # 0) reprocess duplicate results for given threshold (if any) diff --git a/src/perl/lib/WTSI/NPG/Genotyping/QC/QCPlotShared.pm b/src/perl/lib/WTSI/NPG/Genotyping/QC/QCPlotShared.pm index 6a0072a0b..c01f58b1f 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/QC/QCPlotShared.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/QC/QCPlotShared.pm @@ -11,6 +11,7 @@ use Carp; use Config::Tiny; use Cwd; use FindBin qw($Bin); +use File::Slurp qw(read_file); use POSIX qw(floor); use Log::Log4perl qw(:easy); use JSON; @@ -20,7 +21,7 @@ use Exporter; Log::Log4perl->easy_init($ERROR); our @ISA = qw/Exporter/; -our @EXPORT_OK = qw/defaultPipelineDBConfig defaultConfigDir defaultJsonConfig defaultTexIntroPath getDatabaseObject getPlateLocations getPlateLocationsFromPath getSummaryStats meanSd median parseLabel parseThresholds plateLabel readFileToString readMetricResultHash readQCFileNames readQCMetricInputs readQCNameArray readQCShortNameHash readSampleData readSampleInclusion readThresholds $INI_PATH $INI_FILE_DEFAULT $UNKNOWN_PLATE $UNKNOWN_ADDRESS/; +our @EXPORT_OK = qw/defaultPipelineDBConfig defaultConfigDir defaultJsonConfig defaultTexIntroPath getDatabaseObject getPlateLocations getPlateLocationsFromPath getSummaryStats meanSd median parseLabel parseThresholds plateLabel readMetricResultHash readQCFileNames readQCMetricInputs readQCNameArray readQCShortNameHash readSampleData readSampleInclusion readThresholds $INI_PATH $INI_FILE_DEFAULT $UNKNOWN_PLATE $UNKNOWN_ADDRESS/; our $VERSION = ''; @@ -244,16 +245,6 @@ sub plateLabel { return $label; } -sub readFileToString { - # generic method to read a file (eg. json) into a single string variable - my $inPath = shift(); - if (!(defined($inPath)) || !(-r $inPath)) { carp "Cannot read input path \"$inPath\"\n"; } - open my $in, "<", $inPath; - my @lines = <$in>; - close $in; - return join('', @lines); -} - sub readMetricResultHash { # read QC results data structure from JSON file # assumes top-level structure is a hash @@ -286,7 +277,7 @@ sub readQCFileNames { sub readQCNameConfig { # read qc metric names from JSON config my $inPath = shift(); - my %names = %{decode_json(readFileToString($inPath))}; + my %names = %{decode_json(read_file($inPath))}; return %names; } @@ -327,7 +318,7 @@ sub readQCResultHash { # read QC results data structure from JSON file # assumes top-level structure is a hash my $inPath = shift; - my %results = %{decode_json(readFileToString($inPath))}; + my %results = %{decode_json(read_file($inPath))}; return %results; } @@ -349,29 +340,12 @@ sub readSampleData { push(@data, \@fields); } close $in; - return @data; + return @data; } - -sub readSampleInclusion { - # get inclusion/exclusion status of each sample in pipeline DB - # returns a hash reference - my $dbfile = shift; - my $result = `echo 'select name,include from sample;' | sqlite3 $dbfile`; - my @lines = split("\n", $result); - my %inclusion; - foreach my $line (@lines) { - my @fields = split('\|', $line); - my $status = pop @fields; - my $name = join("|", @fields); # OK even if name includes | characters - $inclusion{$name} = $status; - } - return \%inclusion; -} - sub readThresholds { # read QC metric thresholds from config path my $configPath = shift; - my %config = %{decode_json(readFileToString($configPath))}; + my %config = %{decode_json(read_file($configPath))}; return parseThresholds(%config); } diff --git a/src/perl/lib/WTSI/NPG/Genotyping/QC/Reports.pm b/src/perl/lib/WTSI/NPG/Genotyping/QC/Reports.pm index 07ce00a5d..8c8e4ac56 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/QC/Reports.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/QC/Reports.pm @@ -11,6 +11,7 @@ use warnings; use Carp; use Cwd qw/getcwd abs_path/; use File::Basename; +use File::Slurp qw/read_file/; use JSON; use POSIX qw/strftime/; use WTSI::NPG::Genotyping::QC::QCPlotShared qw/defaultJsonConfig getDatabaseObject getSummaryStats meanSd median readQCNameArray readQCShortNameHash plateLabel/; @@ -336,14 +337,6 @@ sub readJson { return $ref; } -sub readFileToString { - my $inPath = shift; - open my $in, "<", $inPath || croak "Cannot open input path $inPath"; - my $string = join("", <$in>); - close $in || croak "Cannot close input path $inPath"; - return $string; -} - sub textForDatasets { # text for dataset identification; includes optional directory name # fields: run project data_supplier snpset directory @@ -539,7 +532,7 @@ sub writeSummaryLatex { open my $out, ">", $texPath || croak "Cannot open output path $texPath"; print $out latexHeader($title, $author); print $out latexSectionInput($qcName, $dbPath); - print $out readFileToString($introPath); # new section = Preface + print $out read_file($introPath); # new section = Preface print $out latexSectionMetrics($config, $genderThresholdPath); print $out latexSectionResults($config, $qcDir, $resultPath, $idPath); print $out latexFooter(); diff --git a/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/Identity.pm b/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/Identity.pm index 76abed347..0c9959f30 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/Identity.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/Identity.pm @@ -41,9 +41,7 @@ has 'swap_threshold' => has 'pass_threshold' => (is => 'ro', - isa => 'Num', - required => 1, - default => 0.85, + isa => 'Maybe[Num]', documentation => 'Minimum identity for metric pass'); # Bayesian model parameters, for SampleIdentityBayseian object @@ -68,7 +66,6 @@ has 'sample_mismatch_prior' => # SMP has 'ecp_default' => (is => 'ro', isa => 'Maybe[Num]', - default => 0.40625, # het 50%, maf 25% documentation => 'Default probability of equivalent calls for a '. 'given SNP on distinct samples', ); @@ -141,18 +138,13 @@ sub find_identity { # now construct empty results for any samples missing from QC data # by convention, these are appended at the end of the results array $self->debug("Inserting empty results for missing samples"); + my $smp = $self->sample_mismatch_prior; foreach my $sample_name (@{$self->sample_names}) { if ($missing{$sample_name}) { - my $calls_p = $self->production_calls->{$sample_name}; + my $args = $self->_get_sample_args($sample_name); my $result = WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian-> - new( - sample_name => $sample_name, - snpset => $self->snpset, - production_calls => $calls_p, - qc_calls => [], - pass_threshold => $self->pass_threshold, - ); + new($args); push @id_results, $result; } } @@ -488,6 +480,8 @@ sub _results_to_json_spec { $summary{'assayed_pass_rate'} = sprintf "%.4f", $pass_rate; # get params (may be using default values from sample ID object) my $result = $identity_results->[0]; + my $pass_threshold = $self->pass_threshold || + $result->pass_threshold; my $ecp = $self->equivalent_calls_probability || $result->equivalent_calls_probability; my $xer = $self->expected_error_rate || @@ -508,7 +502,7 @@ sub _results_to_json_spec { $spec{'swap'} = $swap_evaluation; $spec{'summary'} = \%summary; # id total/failed/missing $spec{'params'} = { - pass_threshold => $self->pass_threshold, + pass_threshold => $pass_threshold, swap_threshold => $self->swap_threshold, equivalent_calls_probability => $ecp, consensus_ecp => $consensus_ecp, @@ -539,19 +533,23 @@ sub _get_failed_results { return \@failed; } -# get arguments to construct a SampleIdentityBayesian object - sub _get_sample_args { + # get arguments to construct a SampleIdentityBayesian object + # QC calls may be omitted, eg. for a sample missing from QC data my ($self, $sample_name, $qc_calls) = @_; + $qc_calls ||= []; my $production_calls = $self->production_calls->{$sample_name}; my %args = ( - logger => $self->logger, - sample_name => $sample_name, - snpset => $self->snpset, - production_calls => $production_calls, - qc_calls => $qc_calls, - pass_threshold => $self->pass_threshold, + logger => $self->logger, + sample_name => $sample_name, + snpset => $self->snpset, + production_calls => $production_calls, + qc_calls => $qc_calls ); + # update with optional attributes (if any) + if (defined($self->pass_threshold)) { + $args{'pass_threshold'} = $self->pass_threshold; + } if (defined($self->equivalent_calls_probability)) { $args{'equivalent_calls_probability'} = $self->equivalent_calls_probability; @@ -568,6 +566,7 @@ sub _get_sample_args { return \%args; } + no Moose; 1; diff --git a/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulator.pm b/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulator.pm new file mode 100644 index 000000000..9480c8a8d --- /dev/null +++ b/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulator.pm @@ -0,0 +1,459 @@ + +package WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator; + +use Moose; + +use MooseX::Types::Moose qw(Int); + +use WTSI::NPG::Genotyping::Call; +use WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian; +use WTSI::NPG::Genotyping::SNPSet; +use WTSI::NPG::Genotyping::Types qw(:all); + +our $VERSION = ''; + +with 'WTSI::DNAP::Utilities::Loggable'; + +# required arguments + +has 'calls' => + (is => 'ro', + isa => 'ArrayRef[WTSI::NPG::Genotyping::Call]', + required => 1, + documentation => 'ArrayRef of QC Call objects to generate simulated data' + ); + +has 'snpset' => + (is => 'ro', + isa => 'WTSI::NPG::Genotyping::SNPSet', + required => 1, + documentation => 'SNPSet for creation of SampleIdentityBayesian '. + 'objects. Must include all SNPs in the "calls" attribute.'); + +# optional argument + +has 'pass_threshold' => + (is => 'ro', + isa => 'Maybe[Num]', + documentation => 'Minimum posterior probability of identity for '. + 'sample pass'); + +# optional params for identity calculation +# passed to SampleIdentityBayesian constructor +# no default values; instead use defaults of SampleIdentityBayesian class + +has 'equivalent_calls_probability' => # ECP + (is => 'ro', + isa => 'Maybe[HashRef[Num]]', + documentation => 'Probability of equivalent genotype calls on distinct '. + 'samples, for each SNP'); + +has 'expected_error_rate' => # XER + (is => 'ro', + isa => 'Maybe[Num]', + documentation => 'Expected rate of experimental error; determines '. + 'probability of non-equivalent calls on identical samples'); + +has 'sample_mismatch_prior' => # SMP + (is => 'ro', + isa => 'Maybe[Num]', + documentation => 'Prior probability of a non-identical sample'); + +# non-input arguments + +has 'total_calls' => + (is => 'ro', + isa => 'Int', + lazy => 1, + default => sub { my ($self) = @_; return scalar @{$self->calls} }, + init_arg => undef, + ); + +has '_identity_params' => + (is => 'ro', + isa => 'HashRef', + lazy => 1, + builder => '_build_identity_params', + init_arg => undef, + ); + +our $DUMMY_SAMPLE_NAME = 'dummy_sample'; + +# class to generate simulated results for the Bayesian identity check +# create fake production and QC calls +# record concordance and identity to generate plots + + + +=head2 find_identity_vary_ecp + + Arg [1] : Maybe[Num] + Arg [2] : Maybe[Num] + Arg [3] : Maybe[Int] + + Example : $results = $simulator->find_identity_vary_ecp(0, 0.05, 20); + Description: Find concordance and identity for different values of the + Equivalent Calls Probability (ECP) parameter. The arguments + control the range of ECP to be used. + Returntype : ArrayRef[ArrayRef[Num]] + +=cut + + +sub find_identity_vary_ecp { + # vary equivalent calls probability (ecp) + # probability of equivalent calls on different samples + my ($self, $start, $incr, $total) = @_; + $start ||= 0; + $incr ||= 0.05; + $total ||= 20; + $self->info("ECP: Start = $start, increment = $incr, total = $total"); + my $ecps = $self->_generate_variable_list($start, $incr, $total); + my $params = $self->_identity_params; + my @results; + my $i = $start; + foreach my $ecp (@{$ecps}) { + $params->{ecp_default} = $ecp; + my $equivalent = 0; + while ($equivalent <= $self->total_calls) { + my $id = $self->_find_identity($self->calls, $params, $equivalent, + $self->total_calls); + my $concord = $equivalent / $self->total_calls; + push @results, [$ecp, $concord, $id]; + $equivalent++; + } + $i += $incr; + } + return \@results; +} + + +=head2 find_identity_vary_qcr + + Arg [1] : Maybe[Num] + Arg [2] : Maybe[Num] + Arg [3] : Maybe[Int] + + Example : $results = $simulator->find_identity_vary_qcr(1, 1, 4); + Description: Find concordance and identity for different values of the + QC Runs (QCR) parameter. The arguments control the range of + QCR to be used. + Returntype : ArrayRef[ArrayRef[Num]] + +=cut + + +sub find_identity_vary_qcr { + # vary number of (identical) QC runs + my ($self, $start, $incr, $total) = @_; + $start ||= 1; + $incr ||= 1; + $total ||= 4; + $self->info("QCR: Start = $start, increment = $incr, total = $total"); + unless (is_Int($start) && is_Int($incr) && is_Int($total)) { + $self->logcroak("Number of QC runs must be an integer"); + } + my $min = 1; + my $max = $start + $incr*$total + 1; + my $qc_totals = $self->_generate_variable_list($start, $incr, + $total, $min, $max); + my @results; + foreach my $qc_total (@{$qc_totals}) { + my $equivalent = 0; + while ($equivalent <= $self->total_calls) { + my $id = $self->_find_identity($self->calls, + $self->_identity_params, + $equivalent, + $self->total_calls, + $qc_total); + my $concord = $equivalent / $self->total_calls; + push @results, [$qc_total, $concord, $id]; + $equivalent++; + } + } + return \@results; +} + + +=head2 find_identity_vary_qcs + + Arg [1] : Maybe[Num] + Arg [2] : Maybe[Num] + Arg [3] : Maybe[Int] + + Example : $results = $simulator->find_identity_vary_qcs(4, 1, 21); + Description: Find concordance and identity for different values of the + total QC SNPs (QCS) parameter. The arguments control the + range of QCS to be used. + Returntype : ArrayRef[ArrayRef[Num]] + +=cut + +sub find_identity_vary_qcs { + # vary the number of QC SNP calls + my ($self, $start, $incr, $total) = @_; + $start ||= 4; + $incr ||= 1; + $total ||= 21; + $self->info("QCS: Start = $start, increment = $incr, total = $total"); + unless (is_Int($start) && is_Int($incr) && is_Int($total)) { + $self->logcroak("Number of QC SNPs must be an integer"); + } + my $min = 1; + my $max = $self->total_calls; # QC SNPs cannot exceed total calls + my $qcs_list = $self->_generate_variable_list($start, $incr, + $total, $min, $max); + my @results; + foreach my $qcs (@{$qcs_list}) { + my $equivalent = 0; + while ($equivalent <= $qcs) { + my $id = $self->_find_identity($self->calls, + $self->_identity_params, + $equivalent, + $qcs); + my $concord = $equivalent / $qcs; + push @results, [$qcs, $concord, $id]; + $equivalent++; + } + } + return \@results; +} + + +=head2 find_identity_vary_smp + + Arg [1] : Maybe[Num] + Arg [2] : Maybe[Num] + Arg [3] : Maybe[Int] + + Example : $results = $simulator->find_identity_vary_smp(0.01, 0.05, 20); + Description: Find concordance and identity for different values of the + Sample Mismatch Prior (SMP) parameter, ie. the Bayesian prior + probability of non-equivalent samples. The arguments control + the range of SMP to be used. + Returntype : ArrayRef[ArrayRef[Num]] + +=cut + +sub find_identity_vary_smp { + # vary the Sample Mismatch Prior (SMP) parameter + # Bayesian prior probability of non-equivalent samples + my ($self, $start, $incr, $total) = @_; + $start ||= 0.01; + $incr ||= 0.05; + $total ||= 20; + $self->info("SMP: Start = $start, increment = $incr, total = $total"); + my $smp_list = $self->_generate_variable_list($start, $incr, $total); + my $params = $self->_identity_params; + my @results; + foreach my $smp (@{$smp_list}) { + $params->{sample_mismatch_prior} = $smp; + my $equivalent = 0; + while ($equivalent <= $self->total_calls) { + my $id = $self->_find_identity($self->calls, $params, $equivalent, + $self->total_calls); + my $concord = $equivalent / $self->total_calls; + push @results, [$smp, $concord, $id]; + $equivalent++; + } + } + return \@results; +} + + +=head2 find_identity_vary_xer + + Arg [1] : Maybe[Num] + Arg [2] : Maybe[Num] + Arg [3] : Maybe[Int] + + Example : $results = $simulator->find_identity_vary_xer(0.01, 0.01, 20); + Description: Find concordance and identity for different values of the + Expected Error Rate (XER) parameter. The arguments control + the range of XER to be used. + Returntype : ArrayRef[ArrayRef[Num]] + +=cut + +sub find_identity_vary_xer { + # vary the expected error rate (XER) + # error = probability of non-equivalent calls on the same sample + my ($self, $start, $incr, $total) = @_; + $start ||= 0.01; + $incr ||= 0.01; + $total ||= 20; + $self->info("XER: Start = $start, increment = $incr, total = $total"); + my $xer_list = $self->_generate_variable_list($start, $incr, $total); + my $params = $self->_identity_params; + my @results; + foreach my $xer (@{$xer_list}) { + $params->{expected_error_rate} = $xer; + my $equivalent = 0; + while ($equivalent <= $self->total_calls) { + my $id = $self->_find_identity($self->calls, $params, $equivalent, + $self->total_calls); + my $concord = $equivalent / $self->total_calls; + push @results, [$xer, $concord, $id]; + $equivalent++; + } + } + return \@results; +} + +sub _build_identity_params { + # build a generic params hash for SampleIdentityBayesian construction + # production_calls and qc_calls are specified for each simulation type + my ($self) = @_; + my %params; + $params{'sample_name'} = $DUMMY_SAMPLE_NAME; + if (defined($self->pass_threshold)) { + args{'pass_threshold'} = $self->pass_threshold; + } + if (defined($self->equivalent_calls_probability)) { + $params{'equivalent_calls_probability'} = + $self->equivalent_calls_probability; + } + if (defined($self->expected_error_rate)) { + $params{'expected_error_rate'} = $self->expected_error_rate; + } + if (defined($self->sample_mismatch_prior)) { + $params{'sample_mismatch_prior'} = $self->sample_mismatch_prior; + } + if (defined($params{'snpset'})) { + $self->logcroak("Cannot supply a snpset in identity_params_input ", + "attribute; must provide separately in the snpset ", + "attribute."); + } + $params{'snpset'} = $self->snpset; + return \%params; +} + +sub _find_identity { + # 'workhorse' method to evaluate the identity metric with given inputs + # $calls = arrayref of Call objects + # $params = hashref of params for SampleIdentityBayesian object creation + my ($self, $calls, $params, $equivalent, $total, $qc_total, $maf) = @_; + $qc_total ||= 1; + $maf ||= 0.25; + my ($calls_p, $calls_q) = $self->_generate_call_subsets($calls, + $equivalent, + $total, + $qc_total, + $maf, + ); + my %args = (production_calls => $calls_p, + qc_calls => $calls_q, + ); + foreach my $key (keys %{$params}) { + $args{$key} = $params->{$key}; + } + my $sib = WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian-> + new(\%args); + return $sib->identity; +} + +sub _generate_call_subsets { + my ($self, $raw_calls, $equivalent, $total, $qc_total, $maf) = @_; + $qc_total ||= 1; # total QC calls per SNP + my @raw_calls = @{$raw_calls}; + if ($equivalent > $total) { + $self->logcroak("Number of equivalent calls cannot be greater ", + "than total"); + } elsif ($total > scalar(@raw_calls)) { + $self->logcroak("Total number of calls cannot be greater than ", + "size of raw call set"); + } + my @production_calls = @raw_calls[0..$total-1]; + my @qc_calls = (); + my $i = 0; + foreach my $call (@production_calls) { + my $qc_call; + if ($i < $equivalent) { + $qc_call = $call; + } else { + # change to non-equivalent genotype for QC call + $qc_call = $self->_flip_genotype($call, $maf); + if ($call->equivalent($qc_call)) { + $self->logcroak("Flipped call should not be equivalent!"); + } + } + push @qc_calls, $qc_call; + my $j = 0; + while ($j < $qc_total - 1) { # add more identical QC calls, if needed + push @qc_calls, $qc_call->clone(); + $j++; + } + $i++; + } + return (\@production_calls, \@qc_calls); +} + +sub _flip_genotype { + # given a call, create a new one with non-equivalent genotype on same SNP + # preserve overall heterozygosity: So het->hom, hom->het + # Use MAF to randomly choose between major/minor het + # if no-call, return an identical no-call + # preserve qscore (if any) + my ($self, $call, $maf) = @_; + $maf ||= 0.25; + my $snp = $call->snp; + my $new_genotype; + my $is_call = 1; + if (!($call->is_call)) { + $new_genotype = 'NN'; + $is_call = 0; + } elsif ($call->is_homozygous || $call->is_homozygous_complement) { + # create heterozygous new call + $new_genotype = $snp->ref_allele.$snp->alt_allele; + } elsif ($call->is_heterozygous || $call->is_heterozygous_complement) { + # create homozygous new call, random choice using MAF + if (rand() < $maf) { + $new_genotype = $snp->alt_allele.$snp->alt_allele; + } else { + $new_genotype = $snp->ref_allele.$snp->ref_allele; + } + } else { + $self->logcroak("Input call is not a no-call, homozygote ", + "or heterozygote: ", $call->str()); + } + my %args = ( + snp => $snp, + genotype => $new_genotype, + is_call => $is_call, + ); + if (defined($call->qscore)) { $args{'qscore'} = $call->qscore; } + return WTSI::NPG::Genotyping::Call->new(\%args); +} + +sub _generate_variable_list { + # generate a list of values for simulation input + # $start, $incr, $total used to generate list + # $min, $max are minimum, maximum permitted values + # (eg. probabilities must be between 0 and 1) + my ($self, $start, $incr, $total, $min, $max) = @_; + $min ||= 0; + $max ||= 1; + if ($start < $min) { + $self->logcroak("Starting value cannot be less than minimum of ", + $min); + } + if ($incr < 0) { + $self->logcroak("Simulation variable increment cannot be negative"); + } + my @values; + my $value = $start; + for (my $i=0;$i<$total;$i++) { + if ($value > $max) { + $self->logcroak("Simulation variable cannot be greater than ", + "maximum of ", $max); + } + push @values, $value; + $value += $incr; + } + return \@values; +} + + +no Moose; + +1; diff --git a/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesian.pm b/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesian.pm index f4dea54bb..15ed2befa 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesian.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesian.pm @@ -46,8 +46,10 @@ has 'qc_calls' => has 'pass_threshold' => (is => 'ro', isa => 'Num', - required => 1, - default => 0.85); + lazy => 1, + builder => '_build_pass_threshold', + documentation => 'Minimum identity for sample pass. Defaults to '. + '(1 - prior probability of sample mismatch).'); # Bayesian model parameters @@ -67,11 +69,10 @@ has 'expected_error_rate' => # XER 'probability of non-equivalent calls on identical samples'); has 'sample_mismatch_prior' => # SMP - (is => 'ro', - isa => 'Num', - default => 0.01, - documentation => 'Prior probability of a non-identical sample'); - + (is => 'ro', + isa => 'Num', + default => 0.01, + documentation => 'Prior probability of a non-identical sample'); has 'ecp_default' => (is => 'ro', @@ -478,6 +479,11 @@ sub _build_ecp { return \%ecp; } +sub _build_pass_threshold { + my ($self) = @_; + return 1 - $self->sample_mismatch_prior; +} + sub _build_total_equivalent_calls { my ($self) = @_; my $k_total = 0; diff --git a/src/perl/lib/WTSI/NPG/Genotyping/SNPSet.pm b/src/perl/lib/WTSI/NPG/Genotyping/SNPSet.pm index 47ac416d2..16fd1701d 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/SNPSet.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/SNPSet.pm @@ -16,12 +16,13 @@ use WTSI::NPG::Genotyping::SNP; use WTSI::NPG::Genotyping::Types qw(:all); +use WTSI::NPG::iRODS::Metadata; # has attribute name constants + our $VERSION = ''; our @HEADER = qw(SNP_NAME REF_ALLELE ALT_ALLELE CHR POS STRAND); -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Storable', - 'WTSI::NPG::Annotation'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Storable'; has 'name' => (is => 'ro', @@ -356,7 +357,7 @@ sub _build_references { my @references; if ($self->data_object) { my @reference_name_avus = $self->data_object->find_in_metadata - ($self->reference_genome_name_attr); + ($REFERENCE_GENOME_NAME); foreach my $avu (@reference_name_avus) { push @references, WTSI::NPG::Genotyping::Reference->new diff --git a/src/perl/lib/WTSI/NPG/Genotyping/SNPSetPublisher.pm b/src/perl/lib/WTSI/NPG/Genotyping/SNPSetPublisher.pm index 8477749e7..a6c07c5c1 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/SNPSetPublisher.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/SNPSetPublisher.pm @@ -9,6 +9,7 @@ use Moose; use WTSI::NPG::Genotyping::SNPSet; use WTSI::NPG::Genotyping::Types qw(Platform); use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants use WTSI::NPG::SimplePublisher; our $VERSION = ''; @@ -79,7 +80,7 @@ sub publish { my @meta = ([$snpset_name => $self->snpset_name]); foreach my $reference (@references) { - push @meta, [$self->reference_genome_name_attr => $reference->name] + push @meta, [$REFERENCE_GENOME_NAME => $reference->name] } my $rods_path = $publisher->publish_file($self->file_name, \@meta, diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayDataObject.pm b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayDataObject.pm index 13413284f..9cea3232b 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayDataObject.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayDataObject.pm @@ -6,24 +6,26 @@ use Data::Dump qw(dump); use Moose; use Try::Tiny; -our $VERSION = ''; +use WTSI::NPG::iRODS::Metadata; -with 'WTSI::NPG::Annotator', 'WTSI::NPG::Genotyping::Annotator'; +our $VERSION = ''; extends 'WTSI::NPG::iRODS::DataObject'; +with 'WTSI::NPG::Annotator', 'WTSI::NPG::Genotyping::Annotator'; + sub update_secondary_metadata { my ($self, $snpdb, $ssdb) = @_; my $plate_name; my $well; - my $sequenom_plate_avu = $self->get_avu($self->sequenom_plate_name_attr); + my $sequenom_plate_avu = $self->get_avu($SEQUENOM_PLATE_NAME); if ($sequenom_plate_avu) { $plate_name = $sequenom_plate_avu->{value}; } - my $well_avu = $self->get_avu($self->sequenom_plate_well_attr); + my $well_avu = $self->get_avu($SEQUENOM_PLATE_WELL); if ($well_avu) { $well = $well_avu->{value}; } @@ -87,9 +89,9 @@ sub update_secondary_metadata { sub update_qc_metadata { my ($self, $snpdb) = @_; - my $sequenom_plate_avu = $self->get_avu($self->sequenom_plate_name_attr); + my $sequenom_plate_avu = $self->get_avu($SEQUENOM_PLATE_NAME); my $plate_name = $sequenom_plate_avu->{value}; - my $well_avu = $self->get_avu($self->sequenom_plate_well_attr); + my $well_avu = $self->get_avu($SEQUENOM_PLATE_WELL); my $well = $well_avu->{value}; # Get well manual QC status from the SNP database. diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayResultSet.pm b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayResultSet.pm index 4a8115ab5..94f78dab6 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayResultSet.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/AssayResultSet.pm @@ -6,10 +6,10 @@ use List::AllUtils qw(uniq); use Moose; use Text::CSV; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants use WTSI::NPG::Genotyping::Sequenom::AssayResult; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Storable', - 'WTSI::NPG::Genotyping::Annotation'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Storable'; our $VERSION = ''; @@ -83,7 +83,7 @@ sub snpset_name { "' is not in iRODS"); my @snpset_names = $self->data_object->find_in_metadata - ($self->sequenom_plex_name_attr); + ($SEQUENOM_PLEX_NAME); my $num_names = scalar @snpset_names; $num_names > 0 or diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Publisher.pm b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Publisher.pm index 7a0949d4f..b7e693d03 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Publisher.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Publisher.pm @@ -10,10 +10,13 @@ use Text::CSV; use Try::Tiny; use URI; +use Data::Dumper; # FIXME + use WTSI::NPG::Genotyping::Sequenom::AssayDataObject; use WTSI::NPG::Genotyping::Sequenom::AssayResultSet; use WTSI::NPG::Publisher; use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; our $VERSION = ''; @@ -164,7 +167,7 @@ sub publish_samples { my $obj = WTSI::NPG::Genotyping::Sequenom::AssayDataObject->new ($self->irods, $rods_path); - $obj->add_avu($self->sequenom_plex_name_attr, $snpset_name); + $obj->add_avu($SEQUENOM_PLEX_NAME, $snpset_name); # Now that adding the secondary metadata is fast enough, we can # run it inline here, so that the data are available diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Subscriber.pm b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Subscriber.pm index b31caa9a7..281694c0b 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Subscriber.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Sequenom/Subscriber.pm @@ -7,6 +7,7 @@ use List::AllUtils qw(all natatime); use WTSI::NPG::Genotyping::Sequenom::AssayDataObject; use WTSI::NPG::Genotyping::Sequenom::AssayResultSet; use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants our $VERSION = ''; @@ -14,8 +15,7 @@ our $VERSION = ''; # queries. our $BATCH_QUERY_CHUNK_SIZE = 100; -with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Annotation', - 'WTSI::NPG::Genotyping::Annotation', 'WTSI::NPG::Genotyping::Subscription'; +with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::Genotyping::Subscription'; has '_plex_name_attr' => (is => 'ro', @@ -23,7 +23,7 @@ has '_plex_name_attr' => init_arg => undef, default => sub { my ($self) = @_; - return $self->sequenom_plex_name_attr; + return $SEQUENOM_PLEX_NAME; }, lazy => 1, documentation => 'iRODS attribute for QC plex name'); @@ -67,8 +67,8 @@ sub get_assay_resultsets { while (my @ids = $iter->()) { my @id_obj_paths = $self->irods->find_objects_by_meta ($self->data_path, - [$self->sequenom_plex_name_attr => $self->snpset_name], - [$self->dcterms_identifier_attr => \@ids, 'in'], @query_specs); + [$SEQUENOM_PLEX_NAME => $self->snpset_name], + [$DCTERMS_IDENTIFIER => \@ids, 'in'], @query_specs); push @obj_paths, @id_obj_paths; } @@ -90,7 +90,7 @@ sub get_assay_resultsets { } my @sample_resultsets = grep { - $_->data_object->get_avu($self->dcterms_identifier_attr, + $_->data_object->get_avu($DCTERMS_IDENTIFIER, $sample_identifier) } @resultsets; $self->debug("Found ", scalar @sample_resultsets, " resultsets for ", @@ -151,6 +151,21 @@ sub get_assay_resultsets_and_vcf_metadata { } +=head2 platform_name + + Arg [1] : None + Example : my $name = $sub->platform_name(); + Description: Return an identifier string for the genotyping platform; + in this case, 'sequenom'. Used to construct a default + callset name in the Subscription role. + Returntype : Str + +=cut + +sub platform_name { + return 'sequenom'; +} + __PACKAGE__->meta->make_immutable; no Moose; @@ -188,7 +203,7 @@ Iain Bancarz =head1 COPYRIGHT AND DISCLAIMER -Copyright (C) 2015 Genome Research Limited. All Rights Reserved. +Copyright (C) 2015, 2016 Genome Research Limited. All Rights Reserved. This program is free software: you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Subscription.pm b/src/perl/lib/WTSI/NPG/Genotyping/Subscription.pm index 802e3aec8..6cbb7e0d6 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Subscription.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Subscription.pm @@ -9,6 +9,7 @@ use WTSI::NPG::Genotyping::Sequenom::AssayDataObject; use WTSI::NPG::Genotyping::Sequenom::AssayResultSet; use WTSI::NPG::Genotyping::VCF::ReferenceFinder; use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants our $VERSION = ''; @@ -22,13 +23,27 @@ our $BATCH_QUERY_CHUNK_SIZE = 100; with 'WTSI::DNAP::Utilities::Loggable'; +requires 'platform_name'; # subclasses must implement this method + +has 'callset' => + (is => 'ro', + isa => 'Str', + documentation => 'Identifier for the callset read by the Subscriber; '. + 'used to disambiguate results from multiple subscribers', + lazy => 1, + default => sub { + my ($self) = @_; + return $self->platform_name().'_'.$self->snpset_name; + }, +); + has 'data_path' => (is => 'ro', isa => 'Str', required => 1, default => sub { return '/' }, writer => '_set_data_path', - documentation => 'The iRODS path under which the raw data are found'); + documentation => 'The iRODS path under which the input data are found'); has 'irods' => (is => 'ro', @@ -60,26 +75,66 @@ has 'repository' => documentation => 'Root directory containing NPG genome references', ); -has 'snpset' => - (is => 'ro', - isa => 'WTSI::NPG::Genotyping::SNPSet', - required => 1, - builder => '_build_snpset', - lazy => 1, - init_arg => undef); - has 'snpset_name' => (is => 'ro', isa => 'Str', required => 1, documentation => 'The name of the SNP set e.g. "W35961"'); +has 'read_snpset_version' => + (is => 'ro', + isa => 'Maybe[Str]', + documentation => 'SNP set version used to read assay results'); -has 'snpset_version' => +has 'write_snpset_version' => (is => 'ro', - isa => 'Str', - required => 0, - documentation => 'SNP set version used for assay results'); + isa => 'Maybe[Str]', + lazy => 1, + default => sub { + my ($self) = @_; + return $self->read_snpset_version; + }, + documentation => 'SNP set version used to write VCF output'); + +# non-input attributes + +has 'read_snpset' => + (is => 'ro', + isa => 'WTSI::NPG::Genotyping::SNPSet', + required => 1, + builder => '_build_read_snpset', + lazy => 1, + init_arg => undef, + documentation => 'SNPSet for plex results input'); + +has 'write_snpset' => + (is => 'ro', + isa => 'WTSI::NPG::Genotyping::SNPSet', + required => 1, + builder => '_build_write_snpset', + lazy => 1, + init_arg => undef, + documentation => 'SNPSet for VCF output'); + +has 'read_snpset_data_object' => + (is => 'ro', + isa => 'WTSI::NPG::iRODS::DataObject', + required => 1, + builder => '_build_read_snpset_data_object', + lazy => 1, + init_arg => undef, + documentation => 'Data object to use for generating SNPSet '. + 'and chromosome lengths'); + +has 'write_snpset_data_object' => + (is => 'ro', + isa => 'WTSI::NPG::iRODS::DataObject', + required => 1, + builder => '_build_write_snpset_data_object', + lazy => 1, + init_arg => undef, + documentation => 'Data object to use for VCF output; may or may not '. + 'differ from input snpset data object'); has '_chromosome_lengths' => (is => 'ro', @@ -97,15 +152,6 @@ has '_plex_name_attr' => init_arg => undef, documentation => 'iRODS attribute for QC plex name; varies by plex type'); -has '_snpset_data_object' => - (is => 'ro', - isa => 'WTSI::NPG::iRODS::DataObject', - required => 1, - builder => '_build_snpset_data_object', - lazy => 1, - init_arg => undef, - documentation => 'Data object to use for generating SNPSet '. - 'and chromosome lengths'); sub BUILD { my ($self) = @_; @@ -145,33 +191,12 @@ sub BUILD { "' does not exist or is not a directory"); } -} - - -=head2 find_irods_snpset - - Arg [1] : Str reference path to search under - Arg [2] : Str reference name in irods metadata - Arg [3] : Str snpset name in irods metadata - Arg [4] : Maybe[Str] snpset version in irods metadata - Example : my $snpset = $sub->find_irods_snpset(@args); - Description: Method to query iRODS and find a snpset object. Optional - version string is used to find the correct SNPSet version for - VCF input/output. - Returntype : WTSI::NPG::Genotyping::SNPSet - -=cut + # ensure that snpset attributes are valid + # attributes are lazy; want to die at object creation time, not in a + # subsequent method call + my $read_data_obj = $self->read_snpset_data_object; + my $write_data_obj = $self->write_snpset_data_object; -sub find_irods_snpset { - my ($self, $ref_path, $ref_name, $snpset_name, $snpset_version) = @_; - unless ($ref_path && $ref_name && $snpset_name) { - $self->logcroak("Missing argument(s) to find_irods_snpset"); - } - my $data_obj = $self->_find_snpset_data_object($ref_path, - $ref_name, - $snpset_name, - $snpset_version); - return WTSI::NPG::Genotyping::SNPSet->new($data_obj); } @@ -228,7 +253,7 @@ sub find_object_paths { my @id_obj_paths = $self->irods->find_objects_by_meta ($self->data_path, [$self->_plex_name_attr => $self->snpset_name], - [$self->dcterms_identifier_attr => \@ids, 'in'], @query_specs); + [$DCTERMS_IDENTIFIER => \@ids, 'in'], @query_specs); push @obj_paths, @id_obj_paths; } return @obj_paths; @@ -257,7 +282,7 @@ sub find_resultsets_index { $resultsets_index{$sample_identifier} = []; } my @sample_resultsets = grep { - $_->data_object->get_avu($self->dcterms_identifier_attr, + $_->data_object->get_avu($DCTERMS_IDENTIFIER, $sample_identifier) } @{$resultsets}; $self->debug("Found ", scalar @sample_resultsets, " resultsets for ", "sample '$sample_identifier'"); @@ -289,6 +314,7 @@ sub find_resultsets_index { sub vcf_metadata_from_irods { my ($self, $data_objects) = @_; my %vcf_meta; + $vcf_meta{'callset_name'} = [$self->callset, ]; foreach my $obj (@{$data_objects}) { # check iRODS metadata my @obj_meta = @{$obj->metadata}; foreach my $pair (@obj_meta) { @@ -326,11 +352,11 @@ sub _are_unique { sub _build_chromosome_lengths { my ($self) = @_; - my $snp_obj = $self->_snpset_data_object; + my $snp_obj = $self->read_snpset_data_object; my $chromosome_lengths; my @avus = $snp_obj->find_in_metadata($CHROMOSOME_JSON_ATTR); if (scalar(@avus)==0) { - $self->logwarn("No value found for snpset attribute ", + $self->logwarn("Snpset iRODS data object has no value for attribute ", "$CHROMOSOME_JSON_ATTR, returning undef"); } elsif (scalar(@avus)==1) { my %avu = %{ shift(@avus) }; @@ -345,22 +371,46 @@ sub _build_chromosome_lengths { return $chromosome_lengths; } -sub _build_snpset { +sub _build_read_snpset { my ($self) = @_; - return WTSI::NPG::Genotyping::SNPSet->new($self->_snpset_data_object); + return WTSI::NPG::Genotyping::SNPSet->new( + $self->read_snpset_data_object); +} +sub _build_write_snpset { + my ($self) = @_; + return WTSI::NPG::Genotyping::SNPSet->new( + $self->write_snpset_data_object); } -sub _build_snpset_data_object { +sub _build_read_snpset_data_object { my ($self) = @_; return $self->_find_snpset_data_object( $self->reference_path, $self->reference_name, $self->snpset_name, - $self->snpset_version + $self->read_snpset_version ); } +sub _build_write_snpset_data_object { + my ($self) = @_; + my $write_data_obj; + if (defined($self->read_snpset_version) && + defined($self->write_snpset_version) && + $self->read_snpset_version ne $self->write_snpset_version) { + $write_data_obj = $self->_find_snpset_data_object( + $self->reference_path, + $self->reference_name, + $self->snpset_name, + $self->write_snpset_version + ); + } else { + $write_data_obj = $self->read_snpset_data_object; + } + return $write_data_obj; +} + sub _find_snpset_data_object { my ($self, $ref_path, $ref_name, $snpset_name, $snpset_version) = @_; unless ($ref_path && $ref_name && $snpset_name) { @@ -397,7 +447,6 @@ sub _find_snpset_data_object { return WTSI::NPG::iRODS::DataObject->new($self->irods, $path); } - no Moose; 1; @@ -419,7 +468,7 @@ Iain Bancarz =head1 COPYRIGHT AND DISCLAIMER -Copyright (c) 2015 Genome Research Limited. All Rights Reserved. +Copyright (c) 2015, 2016 Genome Research Limited. All Rights Reserved. This program is free software: you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General diff --git a/src/perl/lib/WTSI/NPG/Genotyping/VCF/PlexResultFinder.pm b/src/perl/lib/WTSI/NPG/Genotyping/VCF/PlexResultFinder.pm new file mode 100644 index 000000000..5a5ef8aa7 --- /dev/null +++ b/src/perl/lib/WTSI/NPG/Genotyping/VCF/PlexResultFinder.pm @@ -0,0 +1,296 @@ +use utf8; + +package WTSI::NPG::Genotyping::VCF::PlexResultFinder; + +use Moose; + +use File::Slurp qw(read_file); +use File::Spec::Functions qw/catfile/; +use JSON; +use Try::Tiny; + +use WTSI::NPG::Genotyping::Fluidigm::Subscriber; +use WTSI::NPG::Genotyping::Sequenom::Subscriber; +use WTSI::NPG::Genotyping::VCF::AssayResultParser; +use WTSI::NPG::iRODS::DataObject; + +with 'WTSI::DNAP::Utilities::Loggable'; + +has 'irods' => + (is => 'ro', + isa => 'WTSI::NPG::iRODS', + required => 1, + default => sub { return WTSI::NPG::iRODS->new }, + documentation => 'An iRODS handle'); + +has 'sample_ids' => + (is => 'ro', + isa => 'ArrayRef[Str]', + required => 1, + documentation => 'Sample identifiers for query'); + +has 'subscriber_config' => + (is => 'ro', + isa => 'ArrayRef[Str]', + required => 1, + documentation => 'Paths to JSON files of parameters for Subscribers'); + +# non-input parameters + +has 'subscribers' => + (is => 'ro', + isa => 'ArrayRef', + lazy => 1, + init_arg => undef, + builder => '_build_subscribers', + documentation => 'ArrayRef of Sequenom::Subscriber and/or '. + 'Fluidigm::Subscriber objects to query iRODS' + ); + +our $VERSION = ''; + +our $SEQUENOM = 'sequenom'; +our $FLUIDIGM = 'fluidigm'; +our $PLATFORM_KEY = 'platform'; + + +=head2 write_manifests + + Arg [1] : [Str] Directory for TSV output + + Example : write_manifests($out_dir); + Description: Write the TSV plex manifest from each Subscriber object to + the given directory, for use in later pipeline workflows. + Filename is created from the callset name with the .tsv suffix. + Manifest written is the same one used for VCF output (which + may or may not be the same as for the original assay). + Returntype : ArrayRef[Str] Paths for TSV output + +=cut + + +sub write_manifests { + # get the VCF SNPSet DataObject from each Subscriber + # Slurp into a string and write to given directory + # construct filename from the callset name (will be unique) + # return number of manifests written + # uses the VCF (output) snpset, if it differs from the input snpset + my ($self, $outdir) = @_; + if (! -e $outdir) { + $self->logcroak("Output directory '", $outdir, "' does not exist"); + } elsif (! -d $outdir) { + $self->logcroak("Output argument '", $outdir, "' is not a directory"); + } + my @output_paths; + if (scalar @{$self->subscribers} == 0) { + $self->logwarn("No valid Subscriber objects available; QC plex ", + "manifests cannot be found"); + } + foreach my $subscriber (@{$self->subscribers}) { + my $filename = $subscriber->callset.".tsv"; + my $output_path = catfile($outdir, $filename); + open my $out, ">", $output_path || $self->logcroak("Cannot open '", + $output_path, "'"); + print $out $subscriber->write_snpset_data_object->slurp(); + close $out || $self->logcroak("Cannot close '", $output_path, "'"); + push @output_paths, $output_path; + } + return \@output_paths; +} + +=head2 write_vcf + + Arg [1] : [Str] Directory for VCF output + + Example : write_vcf($out_dir); + Description: Write VCF with QC plex results from each Subscriber object to + the given directory. Filename is created from the + callset name with the .vcf suffix. + Returntype : ArrayRef[Str] Paths for VCF output + +=cut + +sub write_vcf { + # query each Subscriber object and write QC plex results as VCF + my ($self, $outdir) = @_; + if (! -e $outdir) { + $self->logcroak("Output directory '", $outdir, "' does not exist"); + } elsif (! -d $outdir) { + $self->logcroak("Output argument '", $outdir, "' is not a directory"); + } + my @vcf_paths; + foreach my $subscriber (@{$self->subscribers}) { + my $filename = $subscriber->callset.".vcf"; + my $output_path = catfile($outdir, $filename); + my $total = $self->_write_vcf_single($subscriber, $output_path); + if ($total > 0) { + push @vcf_paths, $output_path; + $self->info("Wrote $total resultsets to VCF ", $output_path); + } else { + $self->info("No resultsets found, omitting VCF output ", + "for callset '", $subscriber->callset, "'"); + } + } + if (scalar @vcf_paths == 0) { + $self->logwarn("No QC plex data found for VCF output"); + } + return \@vcf_paths; +} + +sub _build_subscribers { + my ($self) = @_; + my @subscribers; + my %callsets; + foreach my $config (@{$self->_read_subscriber_config()}) { + my %args = %{$config}; + my $platform = delete $args{$PLATFORM_KEY}; + my $subscriber; + # Subscriber creation may fail, eg. if plex manifest cannot be located + # Only warn if no valid Subscribers are created + if ($platform eq $FLUIDIGM) { + try { + $subscriber = WTSI::NPG::Genotyping::Fluidigm::Subscriber->new + (%args); + } catch { + $self->info("Unable to create Fluidigm subscriber: ", $_); + } + } elsif ($platform eq $SEQUENOM) { + try { + $subscriber = WTSI::NPG::Genotyping::Sequenom::Subscriber->new + (%args); + } catch { + $self->info("Unable to create Sequenom subscriber: ", $_); + } + } else { + $self->logcroak("Unknown plex type: '", $platform, "'"); + } + if (defined($subscriber)) { + my $callset = $subscriber->callset(); + if ($callsets{$callset}) { + $self->logcroak("Non-unique callset name '", $callset, "'"); + } else { + $callsets{$callset} = 1; + } + push @subscribers, $subscriber; + } + } + my $total = scalar @subscribers; + if ($total == 0) { + $self->logwarn("No valid iRODS subscribers could be created ", + "from given config files; QC plex data will ", + "not be retrieved"); + } else { + $self->info("Successfully created ", $total, " iRODS subscriber(s)", + " to query for QC plex data"); + } + return \@subscribers; +} + + +sub _read_subscriber_config { + # read query params from JSON + my ($self) = @_; + my @config; + foreach my $config_path (@{$self->subscriber_config}) { + if (-e $config_path) { + push @config, decode_json(read_file($config_path)); + } else { + $self->logcroak("Subscriber configuration path '", $config_path, + "' does not exist"); + } + } + return \@config; +} + +sub _write_vcf_single { + # write a single VCF file, from a single Subscriber + my ($self, $subscriber, $output_path) = @_; + my ($resultset_hashref, $vcf_metadata) = + $subscriber->get_assay_resultsets_and_vcf_metadata($self->sample_ids); + # unpack hashref from Subscriber.pm into an array of resultsets + my @resultsets; + foreach my $sample (keys %{$resultset_hashref}) { + my @sample_resultsets = @{$resultset_hashref->{$sample}}; + push @resultsets, @sample_resultsets; + } + my $total = scalar @resultsets; + $self->info("Found $total assay resultsets."); + if (scalar @resultsets == 0) { + $self->info("No assay result sets found for QC callset '", + $subscriber->callset, "'"); + } else { + my $vcfData = WTSI::NPG::Genotyping::VCF::AssayResultParser->new( + resultsets => \@resultsets, + contig_lengths => $subscriber->get_chromosome_lengths(), + assay_snpset => $subscriber->read_snpset, + vcf_snpset => $subscriber->write_snpset, + metadata => $vcf_metadata, + )->get_vcf_dataset(); + open my $out, ">", $output_path || + $self->logcroak("Cannot open VCF output: '", + $output_path, "'"); + print $out $vcfData->str()."\n"; + close $out || + $self->logcroak("Cannot close VCF output: '", + $output_path, "'"); + } + return $total; +} + +__PACKAGE__->meta->make_immutable; + +no Moose; + +1; + +__END__ + +=head1 NAME + +WTSI::NPG::Genotyping::VCF::PlexResultFinder + +=head1 DESCRIPTION + +Find QC plex results (eg. Sequenom, Fluidigm) in iRODS and write as VCF. + +=head2 Method + +=over 1 + +=item * + +Input configuration for one or more iRODS queries + +=item * + +Query iRODS with appropriate Subscriber object + +=item * + +Call AssayResultParser on data returned by query + +=item * + +Write as one or more VCF files + +=back + +=head1 AUTHOR + +Iain Bancarz + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2016 Genome Research Limited. All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the Perl Artistic License or the GNU General +Public License as published by the Free Software Foundation, either +version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +=cut diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Version.pm b/src/perl/lib/WTSI/NPG/Genotyping/Version.pm index 867336538..31095b914 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Version.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Version.pm @@ -12,7 +12,7 @@ use Exporter; our $VERSION = ''; -our $YEARS = '2014, 2015'; +our $YEARS = '2014, 2015, 2016'; sub version_text { my $text = "WTSI Genotyping Pipeline version $VERSION\n". diff --git a/src/perl/t/WTSI/NPG/Database/MLWarehouseTest.pm b/src/perl/t/WTSI/NPG/Database/MLWarehouseTest.pm index 66b21d0be..6906e2785 100644 --- a/src/perl/t/WTSI/NPG/Database/MLWarehouseTest.pm +++ b/src/perl/t/WTSI/NPG/Database/MLWarehouseTest.pm @@ -1,10 +1,10 @@ -package WTSI::NPG::Database::WarehouseTest; +package WTSI::NPG::Database::MLWarehouseTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 13; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Database/WarehouseTest.pm b/src/perl/t/WTSI/NPG/Database/WarehouseTest.pm index ac00dcd65..b1a422144 100644 --- a/src/perl/t/WTSI/NPG/Database/WarehouseTest.pm +++ b/src/perl/t/WTSI/NPG/Database/WarehouseTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Database::WarehouseTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 439; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/DatabaseTest.pm b/src/perl/t/WTSI/NPG/DatabaseTest.pm index 993b75694..08ca83a93 100644 --- a/src/perl/t/WTSI/NPG/DatabaseTest.pm +++ b/src/perl/t/WTSI/NPG/DatabaseTest.pm @@ -8,7 +8,7 @@ use warnings; use English; use File::Spec; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 3; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Expression/AnalysisPublisherTest.pm b/src/perl/t/WTSI/NPG/Expression/AnalysisPublisherTest.pm index 6a9020e14..3f7a1cd37 100644 --- a/src/perl/t/WTSI/NPG/Expression/AnalysisPublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/AnalysisPublisherTest.pm @@ -7,7 +7,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 10; use Test::Exception; @@ -43,11 +43,13 @@ my $study_id = 0; my $irods_tmp_coll; my $pid = $$; +my $test_num = 0; sub make_fixture : Test(setup) { my $irods = WTSI::NPG::iRODS->new; $irods_tmp_coll = - $irods->add_collection("ExpressionAnalysisPublisherTest.$pid"); + $irods->add_collection("ExpressionAnalysisPublisherTest.$pid.$test_num"); + $test_num++; $irods->put_collection($sample_data_path, $irods_tmp_coll); diff --git a/src/perl/t/WTSI/NPG/Expression/ChipLoadingManifestTest.pm b/src/perl/t/WTSI/NPG/Expression/ChipLoadingManifestTest.pm index ac7eb5343..5d5b30cf2 100644 --- a/src/perl/t/WTSI/NPG/Expression/ChipLoadingManifestTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/ChipLoadingManifestTest.pm @@ -6,7 +6,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 8; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Expression/InfiniumDataObjectTest.pm b/src/perl/t/WTSI/NPG/Expression/InfiniumDataObjectTest.pm index c86ed05bf..79db0b750 100644 --- a/src/perl/t/WTSI/NPG/Expression/InfiniumDataObjectTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/InfiniumDataObjectTest.pm @@ -38,7 +38,7 @@ package WTSI::NPG::Expression::InfiniumDataObjectTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 13; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Expression/ProfileAnnotationTest.pm b/src/perl/t/WTSI/NPG/Expression/ProfileAnnotationTest.pm index 86031e78b..9ee9102da 100644 --- a/src/perl/t/WTSI/NPG/Expression/ProfileAnnotationTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/ProfileAnnotationTest.pm @@ -3,7 +3,7 @@ package WTSI::NPG::Expression::ProfileAnnotationTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 10; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Expression/PublisherTest.pm b/src/perl/t/WTSI/NPG/Expression/PublisherTest.pm index 5f73d8a9e..f773d4e8b 100644 --- a/src/perl/t/WTSI/NPG/Expression/PublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/PublisherTest.pm @@ -55,7 +55,7 @@ use warnings; use Cwd qw(abs_path); use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 47; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Expression/ResultSetTest.pm b/src/perl/t/WTSI/NPG/Expression/ResultSetTest.pm index 914b50ce7..9e0d8a2c0 100644 --- a/src/perl/t/WTSI/NPG/Expression/ResultSetTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/ResultSetTest.pm @@ -3,7 +3,7 @@ package WTSI::NPG::Expression::ResultSetTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Expression/SampleProbeProfileTest.pm b/src/perl/t/WTSI/NPG/Expression/SampleProbeProfileTest.pm index 21eb9ca50..3a504c677 100644 --- a/src/perl/t/WTSI/NPG/Expression/SampleProbeProfileTest.pm +++ b/src/perl/t/WTSI/NPG/Expression/SampleProbeProfileTest.pm @@ -3,9 +3,9 @@ package WTSI::NPG::Expression::SampleProbeProfileTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; -use Test::More tests => 16; +use Test::More tests => 15; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -28,7 +28,7 @@ sub require : Test(1) { require_ok('WTSI::NPG::Expression::SampleProbeProfile'); } -sub constructor : Test(4) { +sub constructor : Test(3) { new_ok('WTSI::NPG::Expression::SampleProbeProfile', [file_name => "$data_path/$no_norm_file"]); diff --git a/src/perl/t/WTSI/NPG/Genotyping/CallTest.pm b/src/perl/t/WTSI/NPG/Genotyping/CallTest.pm index c0b8921f7..d2cefa7b9 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/CallTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/CallTest.pm @@ -9,9 +9,9 @@ use warnings; use Log::Log4perl; use List::AllUtils qw(all); -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::Exception; -use Test::More tests => 70; +use Test::More tests => 69; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -48,7 +48,7 @@ sub clone : Test(4) { "Qscore preserved by cloning"); } -sub constructor : Test(7) { +sub constructor : Test(6) { my $snpset = WTSI::NPG::Genotyping::SNPSet->new("$data_path/$data_file"); new_ok('WTSI::NPG::Genotyping::Call', @@ -354,5 +354,5 @@ sub equivalent : Test(15) { ok(!$no_call->equivalent($no_call), 'No call not equivalent with self'); } -1; +return 1; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Database/InfiniumTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Database/InfiniumTest.pm index 5af6f9b3b..a84d515eb 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Database/InfiniumTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Database/InfiniumTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Database::InfiniumTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 13; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Database/PipelineTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Database/PipelineTest.pm index 5266b0c1c..f09e0e784 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Database/PipelineTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Database/PipelineTest.pm @@ -6,8 +6,8 @@ package WTSI::NPG::Genotyping::Database::PipelineTest; use strict; use warnings; -use base qw(Test::Class); -use Test::More tests => 67; +use base qw(WTSI::NPG::Test); +use Test::More tests => 64; use Test::Exception; use File::Temp qw(tempdir); @@ -48,7 +48,7 @@ sub teardown : Test(teardown) { } } -sub require : Test(3) { +sub require : Test(1) { require_ok('WTSI::NPG::Genotyping::Database::Pipeline'); } @@ -83,7 +83,7 @@ sub connect : Test(6) { ok($tmpdb->is_connected, 'Is connected'); } -sub disconnect : Test(4) { +sub disconnect : Test(3) { ok($db->is_connected, 'Is connected'); ok($db->disconnect, 'Can disconnect'); ok(!$db->is_connected, 'Finally, is not connected'); diff --git a/src/perl/t/WTSI/NPG/Genotyping/Database/SNPTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Database/SNPTest.pm index e5e275eec..87709e980 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Database/SNPTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Database/SNPTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Database::SNPTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 13; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Database/SequenomTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Database/SequenomTest.pm index 16b2e0116..810e9be2b 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Database/SequenomTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Database/SequenomTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Database::SequenomTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 13; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayDataObjectTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayDataObjectTest.pm index 9bcd1cd25..2cff64dbc 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayDataObjectTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayDataObjectTest.pm @@ -46,7 +46,7 @@ package WTSI::NPG::Genotyping::Fluidigm::AssayDataObjectTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 11; use Test::Exception; @@ -173,8 +173,14 @@ sub update_secondary_metadata_missing_value : Test(2) { # so this one is done first. The test ensures that an invalid AVU # value only causes that AVU to be skipped - all subsequent ones are # applied. + # + # In this instance, the update for 'dcterms:identifier' fails because of + # the invalid value, so the existing 'dcterms:identifier' AVU remains + # unchanged with a value of '9999999999'. Subsequently, the value of + # 'sample_supplier_name' is successfully updated from 'zzzzzzzzzz' to + # 'aaaaaaaaaa'. my $expected_meta = - [# {attribute => 'dcterms:identifier', value => '0123456789'}, + [{attribute => 'dcterms:identifier', value => '9999999999'}, {attribute => 'fluidigm_plate', value => '1381735059'}, {attribute => 'fluidigm_well', value => 'S01'}, {attribute => 'sample', value => 'sample1' }, diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm index 105f1acae..09564aec1 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Fluidigm::AssayResultSetTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 68; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm index 0c37497d2..7927f7e2e 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Fluidigm::AssayResultTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 402; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ExportFileTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ExportFileTest.pm index 202373ba2..5690cdf95 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ExportFileTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ExportFileTest.pm @@ -9,7 +9,7 @@ use warnings; use File::Compare; use File::Temp qw(tempdir); -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 299; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/PublisherTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/PublisherTest.pm index 36a0603af..0c4cd57b8 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/PublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/PublisherTest.pm @@ -54,7 +54,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 86; use Test::Exception; @@ -99,7 +99,6 @@ sub make_fixture : Test(setup) { sub teardown : Test(teardown) { my $irods = WTSI::NPG::iRODS->new; $irods->remove_collection($irods_tmp_coll); - undef $resultset; } @@ -453,7 +452,8 @@ sub test_metadata { ok($data_object->get_avu('dcterms:modified'), 'Has dcterms:modified'); } else { - ok(!$data_object->get_avu('dcterms:modified'), 'Has no dcterms:modified'); + my @exists = $data_object->find_in_metadata('dcterms:modified'); + ok(scalar(@exists)==0, 'Has no dcterms:modified'); } foreach my $avu (@$expected_metadata) { diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ResultSetTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ResultSetTest.pm index 4d61135fb..34e62b828 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ResultSetTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/ResultSetTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Fluidigm::ResultSetTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 15; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/SubscriberTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/SubscriberTest.pm index be7b31265..8d1dd900b 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/SubscriberTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/SubscriberTest.pm @@ -7,12 +7,14 @@ use strict; use warnings; use DateTime; use File::Path qw/make_path/; +use File::Slurp qw/read_file/; use File::Spec::Functions qw/catfile/; -use File::Temp qw(tempdir); -use List::AllUtils qw(uniq); +use File::Temp qw/tempdir/; +use JSON; +use List::AllUtils qw/uniq/; -use base qw(Test::Class); -use Test::More tests => 42; +use base qw(WTSI::NPG::Test); +use Test::More tests => 43; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -37,6 +39,7 @@ my $non_unique_identifier = 'ABCDEFGHI'; my $reference_name = 'Homo_sapiens (GRCh37)'; my $snpset_name = 'qc'; my $snpset_file = 'qc.tsv'; +my $chromosome_length_file = 'chromosome_lengths_GRCh37.json'; my $tmp; my $irods_tmp_coll; @@ -47,12 +50,17 @@ sub make_fixture : Test(setup) { my $irods = WTSI::NPG::iRODS->new; $irods_tmp_coll = "FluidigmSubscriberTest.$pid"; $irods->add_collection($irods_tmp_coll); - $irods->add_object("$data_path/$snpset_file", "$irods_tmp_coll/$snpset_file"); + my $chromosome_lengths_irods = "$irods_tmp_coll/$chromosome_length_file"; + $irods->add_object("$data_path/$chromosome_length_file", + $chromosome_lengths_irods); + $irods->add_object("$data_path/$snpset_file", + "$irods_tmp_coll/$snpset_file"); my $snpset_obj = WTSI::NPG::iRODS::DataObject->new ($irods,"$irods_tmp_coll/$snpset_file")->absolute; $snpset_obj->add_avu('fluidigm_plex', $snpset_name); $snpset_obj->add_avu('reference_name', $reference_name); + $snpset_obj->add_avu('chromosome_json', $chromosome_lengths_irods); foreach my $i (0..2) { my $file = $assay_resultset_files[$i]; @@ -100,6 +108,19 @@ sub constructor : Test(1) { snpset_name => $snpset_name]); } +sub find_object_paths : Test(1) { + my $irods = WTSI::NPG::iRODS->new; + my @obj_paths = WTSI::NPG::Genotyping::Fluidigm::Subscriber->new + (irods => $irods, + data_path => $irods_tmp_coll, + reference_path => $irods_tmp_coll, + reference_name => $reference_name, + snpset_name => $snpset_name)->find_object_paths( + [uniq @sample_identifiers]); + ok(scalar @obj_paths == 3, + "Found 3 iRODS object paths for sample results"); +} + sub get_assay_resultsets_and_vcf_metadata : Test(6) { my $irods = WTSI::NPG::iRODS->new; my ($resultsets1, $meta1) = WTSI::NPG::Genotyping::Fluidigm::Subscriber->new @@ -112,7 +133,8 @@ sub get_assay_resultsets_and_vcf_metadata : Test(6) { my $expected_meta = { 'plex_type' => [ 'fluidigm' ], - 'plex_name' => [ 'qc' ] + 'plex_name' => [ 'qc' ], + 'callset_name' => [ 'fluidigm_qc' ], }; is_deeply($meta1, $expected_meta, "VCF metadata matches expected values"); @@ -142,27 +164,6 @@ sub get_assay_resultsets_and_vcf_metadata : Test(6) { ok(defined $resultsets2, "'IN' query of 100 args"); } -sub get_assay_resultset : Test(2) { - my $irods = WTSI::NPG::iRODS->new; - my $resultset = WTSI::NPG::Genotyping::Fluidigm::Subscriber->new - (irods => $irods, - data_path => $irods_tmp_coll, - reference_path => $irods_tmp_coll, - reference_name => $reference_name, - snpset_name => $snpset_name)->get_assay_resultset('XYZ0123456789'); - - ok($resultset, 'Assay resultsets'); - dies_ok { - WTSI::NPG::Genotyping::Fluidigm::Subscriber->new - (irods => $irods, - data_path => $irods_tmp_coll, - reference_path => $irods_tmp_coll, - reference_name => $reference_name, - snpset_name => $snpset_name)->get_assay_resultset - ($non_unique_identifier); - } 'Fails on matching multiple results'; -} - sub get_calls : Test(31) { my $irods = WTSI::NPG::iRODS->new; @@ -277,6 +278,21 @@ sub get_calls : Test(31) { "Calls do not match for snp at position $unmatched_pos"); } +sub get_chromosome_lengths : Test(2) { + my $irods = WTSI::NPG::iRODS->new; + my $chr_lengths = WTSI::NPG::Genotyping::Fluidigm::Subscriber->new + (irods => $irods, + data_path => $irods_tmp_coll, + reference_path => $irods_tmp_coll, + reference_name => $reference_name, + snpset_name => $snpset_name)->get_chromosome_lengths(); + ok($chr_lengths, 'Chromosome lengths found'); + my $chromosome_length_path = "$data_path/$chromosome_length_file"; + my $chr_lengths_expected = decode_json(read_file($chromosome_length_path)); + is_deeply($chr_lengths, $chr_lengths_expected, + "Chromosome lengths match expected values"); +} + sub _get_observed_calls { # get (snp_name, genotype) pair observed for each call my ($irods, $irods_coll, $rname, $sname, $sample_id) = @_; diff --git a/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerCallTest.pm b/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerCallTest.pm index 0c13688e2..c6f7c7061 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerCallTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerCallTest.pm @@ -1,10 +1,13 @@ + +package WTSI::NPG::Genotyping::GenderMarkerCallTest; + use strict; use warnings; use Log::Log4perl; use List::AllUtils qw(all); -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::Exception; use Test::More tests => 27; diff --git a/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerTest.pm b/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerTest.pm index fb7e6d851..18c2fb689 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/GenderMarkerTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::GenderMarkerTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 17; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/IlluminusTest.pm b/src/perl/t/WTSI/NPG/Genotyping/IlluminusTest.pm index d0969d6fc..e0f1c2f63 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/IlluminusTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/IlluminusTest.pm @@ -1,7 +1,7 @@ use utf8; -package WTSI::NPG::Genotyping::IlluminaTest; +package WTSI::NPG::Genotyping::IlluminusTest; use strict; use warnings; @@ -9,8 +9,8 @@ use File::Compare; use File::Temp qw(tempdir); use JSON; -use base qw(Test::Class); -use Test::More tests => 12; +use base qw(WTSI::NPG::Test); +use Test::More tests => 11; use Test::Exception; use Log::Log4perl; @@ -128,7 +128,7 @@ sub test_nullify_females : Test(2) { "$out_file is identical to $expected_file"); } -sub test_write_it_header : Test(3) { +sub test_write_it_header : Test(2) { my $expected_file = "$data_path/iln_header.txt"; my $tmpdir = tempdir(CLEANUP => 1); diff --git a/src/perl/t/WTSI/NPG/Genotyping/Infinium/AnalysisPublisherTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Infinium/AnalysisPublisherTest.pm index e46961064..475840361 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Infinium/AnalysisPublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Infinium/AnalysisPublisherTest.pm @@ -7,7 +7,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 5; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Infinium/InfiniumDataObjectTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Infinium/InfiniumDataObjectTest.pm index fa9b22c2e..40394920e 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Infinium/InfiniumDataObjectTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Infinium/InfiniumDataObjectTest.pm @@ -58,10 +58,10 @@ package WTSI::NPG::Genotyping::Infinium::InfiniumDataObjectTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use List::AllUtils qw(none); -use Test::More tests => 15; +use Test::More tests => 14; use Test::Exception; use WTSI::NPG::iRODS; @@ -110,7 +110,7 @@ sub require : Test(1) { require_ok('WTSI::NPG::Genotyping::Infinium::InfiniumDataObject'); } -sub metadata : Test(3) { +sub metadata : Test(2) { my $irods = WTSI::NPG::iRODS->new; my $gtc_irods_path = "$irods_tmp_coll/$gtc_file"; @@ -189,8 +189,14 @@ sub update_secondary_metadata_missing_value : Test(2) { # so this one is done first. The test ensures that an invalid AVU # value only causes that AVU to be skipped - all subsequent ones are # applied. + # + # In this instance, the update for 'dcterms:identifier' fails because of + # the invalid value, so the existing 'dcterms:identifier' AVU remains + # unchanged with a value of '9999999999'. Subsequently, the value of + # 'sample_supplier_name' is successfully updated from 'zzzzzzzzzz' to + # 'aaaaaaaaaa'. my $expected_meta = - [# {attribute => 'dcterms:identifier', value => '0123456789'}, + [{attribute => 'dcterms:identifier', value => '9999999999'}, {attribute => 'infinium_plate', value => 'plate1'}, {attribute => 'infinium_well', value => 'A10'}, {attribute => 'sample', value => 'sample1' }, diff --git a/src/perl/t/WTSI/NPG/Genotyping/Infinium/PublisherTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Infinium/PublisherTest.pm index d61388f60..29cfac79b 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Infinium/PublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Infinium/PublisherTest.pm @@ -118,8 +118,8 @@ use warnings; use Cwd qw(abs_path); use DateTime; -use base qw(Test::Class); -use Test::More tests => 338; +use base qw(WTSI::NPG::Test); +use Test::More tests => 316; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -425,7 +425,7 @@ sub publish_methylation : Test(45) { } } -sub publish_overwrite : Test(91) { +sub publish_overwrite : Test(69) { my $publication_time = DateTime->now; my $publisher = WTSI::NPG::Genotyping::Infinium::Publisher->new diff --git a/src/perl/t/WTSI/NPG/Genotyping/Infinium/ResultSetTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Infinium/ResultSetTest.pm index 66442105b..88fd79092 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Infinium/ResultSetTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Infinium/ResultSetTest.pm @@ -6,8 +6,8 @@ package WTSI::NPG::Genotyping::Infinium::ResultSetTest; use strict; use warnings; -use base qw(Test::Class); -use Test::More tests => 12; +use base qw(WTSI::NPG::Test); +use Test::More tests => 11; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -25,7 +25,7 @@ sub require : Test(1) { require_ok('WTSI::NPG::Genotyping::Infinium::ResultSet'); } -sub constructor : Test(10) { +sub constructor : Test(9) { new_ok('WTSI::NPG::Genotyping::Infinium::ResultSet', [beadchip => '0123456789', diff --git a/src/perl/t/WTSI/NPG/Genotyping/Infinium/SampleQueryTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Infinium/SampleQueryTest.pm index c669cd072..812b8b16d 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Infinium/SampleQueryTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Infinium/SampleQueryTest.pm @@ -91,7 +91,7 @@ package WTSI::NPG::Genotyping::Infinium::SampleQueryTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 6; Log::Log4perl::init('./etc/log4perl_tests.conf'); diff --git a/src/perl/t/WTSI/NPG/Genotyping/QC/CollationTest.pm b/src/perl/t/WTSI/NPG/Genotyping/QC/CollationTest.pm new file mode 100644 index 000000000..92bbf20d7 --- /dev/null +++ b/src/perl/t/WTSI/NPG/Genotyping/QC/CollationTest.pm @@ -0,0 +1,137 @@ +use utf8; + +package WTSI::NPG::Genotyping::QC::CollationTest; + +use strict; +use warnings; + +use File::Copy qw/copy/; +use File::Slurp qw/read_file/; +use File::Spec::Functions qw/catfile/; +use File::Temp qw/tempdir/; +use FindBin qw/$Bin/; +use JSON; +use Text::CSV; + +use base qw(WTSI::NPG::Test); +use Test::More tests => 16; +use Test::Exception; + +use WTSI::NPG::Genotyping::QC::Collation qw(collate); +use WTSI::NPG::Genotyping::QC::QCPlotShared qw(readSampleInclusion); + +Log::Log4perl::init('./etc/log4perl_tests.conf'); + +our $log = Log::Log4perl->get_logger(); + +BEGIN { use_ok('WTSI::NPG::Genotyping::QC::Collation'); } + +my $temp_dir; +my $dbName = 'small_test.db'; +my $data_dir = "$Bin/qc_test_data/"; +my $example_dir = catfile($data_dir, 'output_examples'); + +my $configPath = catfile($data_dir, 'config_test.json'); +my $thresholdPath = $configPath; +my $dbPath = catfile($data_dir, 'small_test.db'); +my $iniPath = $ENV{HOME} . "/.npg/genotyping.ini"; + +my $resultsExpected = catfile($example_dir, 'qc_results.json'); +my $metricsExpected = catfile($example_dir, 'qc_metrics.json'); +my $csvExpected = catfile($example_dir, 'qc_results.csv'); + +my $expectedCsvContents; + +sub make_fixture : Test(setup) { + $temp_dir = tempdir("CollationTest_XXXXXX", CLEANUP => 1); + open my $fh, "<", $csvExpected || $log->logcroak("Cannot open CSV '", + $csvExpected, "'"); + my $csv = Text::CSV->new(); + $expectedCsvContents = $csv->getline_all($fh); + close $fh || $log->logcroak("Cannot close CSV '", $csvExpected, "'"); +} + +sub require : Test(1) { + require_ok('WTSI::NPG::Genotyping::QC::Collation'); +} + +sub collation : Test(6) { + my $jsonResults = catfile($temp_dir, 'qc_results.json'); + my $jsonMetrics = catfile($temp_dir, 'qc_metrics.json'); + my $csvPath = catfile($temp_dir, 'qc_results.csv'); + my $exclude = 0; + my $metricsRef = 0; + my $verbose = 0; + collate($example_dir, $configPath, $thresholdPath, $dbPath, $iniPath, + $jsonResults, $jsonMetrics, $csvPath, + $exclude, $metricsRef, $verbose); + ok(-e $jsonMetrics, "JSON metrics path exists"); + my $got_metrics = decode_json(read_file($jsonMetrics)); + my $expected_metrics = decode_json(read_file($metricsExpected)); + is_deeply($got_metrics, $expected_metrics, + "JSON metrics data equivalent to expected"); + ok(-e $jsonResults, "JSON results path exists"); + my $got_results = decode_json(read_file($jsonResults)); + my $expected_results = decode_json(read_file($resultsExpected)); + is_deeply($got_results, $expected_results, + "JSON results data equivalent to expected"); + ok(-e $csvPath, "CSV results path exists"); + open my $fh, "<", $csvPath || $log->logcroak("Cannot open CSV '", + $csvPath, "'"); + my $csv = Text::CSV->new(); + my $csvContents = $csv->getline_all($fh); + close $fh || $log->logcroak("Cannot close CSV '", $csvPath, "'"); + is_deeply($csvContents, $expectedCsvContents, "CSV contents match"); +} + +sub collation_script : Test(8) { + my $jsonResults = catfile($temp_dir, 'qc_results.json'); + my $jsonMetrics = catfile($temp_dir, 'qc_metrics.json'); + my $csvPath = catfile($temp_dir, 'qc_results.csv'); + # apply sample exclusion to temporary copy of DB + my $dbTemp = catfile($temp_dir, 'genotyping.db'); + copy($dbPath, $dbTemp) || $log->logcroak("Failed to copy database from ", + $dbPath, " to ", $dbTemp); + ok(system(join q{ }, "collate_qc_results.pl", + "--input $example_dir", + "--status $jsonResults", + "--dbpath $dbTemp", + "--csv $csvPath", + "--metrics $jsonMetrics", + "--config $configPath", + "--exclude") == 0, 'Ran collation script'); + # check for sample exclusion in database + my $exclPath = catfile($example_dir, "qc_exclusions.json"); + my $expectedInclusion = decode_json(read_file($exclPath)); + my $result = `echo 'select name,include from sample;' | sqlite3 $dbTemp`; + my @lines = split("\n", $result); + my %inclusion; + foreach my $line (@lines) { + my @fields = split('\|', $line); + my $status = pop @fields; + my $name = join("|", @fields); # OK even if name includes | characters + $inclusion{$name} = $status; + } + is_deeply(\%inclusion, $expectedInclusion, + "Sample inclusion status in pipeline DB"); + # check other outputs + ok(-e $jsonMetrics, "JSON metrics path exists"); + my $got_metrics = decode_json(read_file($jsonMetrics)); + my $expected_metrics = decode_json(read_file($metricsExpected)); + is_deeply($got_metrics, $expected_metrics, + "JSON metrics data equivalent to expected"); + ok(-e $jsonResults, "JSON results path exists"); + my $got_results = decode_json(read_file($jsonResults)); + my $expected_results = decode_json(read_file($resultsExpected)); + is_deeply($got_results, $expected_results, + "JSON results data equivalent to expected"); + ok(-e $csvPath, "CSV results path exists"); + open my $fh, "<", $csvPath || $log->logcroak("Cannot open CSV '", + $csvPath, "'"); + my $csv = Text::CSV->new(); + my $csvContents = $csv->getline_all($fh); + close $fh || $log->logcroak("Cannot close CSV '", $csvPath, "'"); + is_deeply($csvContents, $expectedCsvContents, "CSV contents match"); +} + +1; diff --git a/src/perl/t/WTSI/NPG/Genotyping/QC/IdentityTest.pm b/src/perl/t/WTSI/NPG/Genotyping/QC/IdentityTest.pm index ce4eb77e2..ceb6af680 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/QC/IdentityTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/QC/IdentityTest.pm @@ -1,20 +1,21 @@ use utf8; -package WTSI::NPG::Genotyping::IdentityTest; +package WTSI::NPG::Genotyping::QC::IdentityTest; use strict; use warnings; +use File::Slurp qw(read_file); use File::Temp qw(tempdir); use JSON; use Log::Log4perl; use Log::Log4perl::Level; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 28; use Test::Exception; use WTSI::NPG::Genotyping::QC::Identity; -use WTSI::NPG::Genotyping::QC::QCPlotShared qw/readFileToString defaultJsonConfig/; +use WTSI::NPG::Genotyping::QC::QCPlotShared qw/defaultJsonConfig/; use WTSI::NPG::Genotyping::QC::SnpID qw/convertFromIlluminaExomeSNP convertToIlluminaExomeSNP/; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -40,7 +41,7 @@ my $iniPath = $ENV{HOME} . "/.npg/genotyping.ini"; sub setup : Test(setup) { $workdir = tempdir("identity_test_XXXXXX", CLEANUP => 1); $jsonOutPath = $workdir.'/'.$jsonName; - $jsonRef = decode_json(readFileToString($dataDir.'/'.$jsonName)); + $jsonRef = decode_json(read_file($dataDir.'/'.$jsonName)); } sub teardown : Test(teardown) { @@ -101,8 +102,8 @@ sub test_insufficient_snps : Test(2) { )->run_identity_check(); ok(-e $jsonOutPath, "JSON output exists for insufficient SNPs"); my $failJson = $dataDir.'/identity_check_fail.json'; - my $failDataRef = decode_json(readFileToString($failJson)); - my $jsonOut = decode_json(readFileToString($jsonOutPath)); + my $failDataRef = decode_json(read_file($failJson)); + my $jsonOut = decode_json(read_file($jsonOutPath)); is_deeply($jsonOut, $failDataRef, "JSON output is equivalent to reference"); } @@ -136,7 +137,7 @@ sub validate_outputs { # check for output files and validate contents of JSON # expects output files for the 'standard' test dataset and parameters ok(-e $jsonOutPath, "JSON output exists"); - my $jsonOut = decode_json(readFileToString($jsonOutPath)); + my $jsonOut = decode_json(read_file($jsonOutPath)); is_deeply($jsonOut, $jsonRef, "JSON output is equivalent to reference"); ok(-e $workdir.'/'.$textName, "Text summary exists"); ok(-e $workdir.'/'.$failPairsName, "Failed pairs comparison exists"); diff --git a/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulatorTest.pm b/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulatorTest.pm new file mode 100644 index 000000000..6e081b3ab --- /dev/null +++ b/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentitySimulatorTest.pm @@ -0,0 +1,134 @@ + +package WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulatorTest; + +use strict; +use warnings; + +use base qw(Test::Class); +use File::Temp qw(tempdir); +use Test::More tests => 18; +use Test::Exception; +use Text::CSV; + +use WTSI::NPG::Genotyping::Call; +use WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator; +use WTSI::NPG::Genotyping::SNPSet; + +Log::Log4perl::init('./etc/log4perl_tests.conf'); +my $log = Log::Log4perl->get_logger(); + +my $data_path = './t/qc/check/identity'; +my $snpset_file = "$data_path/W30467_snp_set_info_1000Genomes.tsv"; +my $snpset; +my $calls; + +sub setup : Test(setup) { + + $snpset = WTSI::NPG::Genotyping::SNPSet->new($snpset_file); + + # copy-pasted from identity_simulation.pl + # useful to have a fixed set of test calls + my @data = ( + ['rs649058', 'AG'], + ['rs1131498', 'AA'], + ['rs1805087', 'AG'], + ['rs3795677', 'AG'], + ['rs6166', 'AG'], + ['rs1801262', 'AA'], + ['rs2286963', 'GT'], + ['rs6759892', 'GT'], + ['rs7627615', 'AG'], + ['rs11096957', 'AA'], + ['rs2247870', 'CT'], + ['rs4619', 'AG'], + ['rs532841', 'CT'], + ['rs6557634', 'CT'], + ['rs4925', 'AC'], + ['rs156697', 'AA'], + ['rs5215', 'CT'], + ['rs12828016', 'AA'], + ['rs7298565', 'AG'], + ['rs3742207', 'AC'], + ['rs4075254', 'CT'], + ['rs4843075', 'GA'], + ['rs8065080', 'CT'], + ['rs1805034', 'AA'], + ['rs2241714', 'CT'], + ['rs753381', 'AG'] + ); + my @calls = map { + my ($snp, $genotype) = @$_; + WTSI::NPG::Genotyping::Call->new + (snp => $snpset->named_snp($snp), + genotype => $genotype) } @data; + $calls = \@calls; + + my $id_sim = WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator->new( + calls => \@calls, + snpset => $snpset + ); +} + +sub require : Test(1) { + require_ok('WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator'); +} + + +sub construct : Test(1) { + + my @args = (snpset => $snpset, + calls => $calls); + new_ok('WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator' + => \@args); +} + +sub script : Test(11) { + + my $script = "./bin/identity_simulation.pl"; + my $tempdir = tempdir("id_sim_test_XXXXXX", cleanup => 1); + my @modes = qw/ecp qcs qcr smp xer/; + foreach my $mode (@modes) { + my $output = "$tempdir/$mode.txt"; + my $cmd = "$script --mode $mode > $output"; + is(system($cmd), 0, "$cmd executed successfully"); + my $results = _read_tsv($output); + my $expected = _read_tsv($data_path."/simulated_$mode.txt"); + is_deeply($results, $expected, + uc($mode)." results match expected values"); + } + my $cmd = "$script --mode foo &> /dev/null"; + isnt(system($cmd), 0, "Fails with invalid mode argument"); +} + +sub simulate : Test(5) { + + my $id_sim = WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulator->new( + snpset => $snpset, + calls => $calls); + my $results; + $results = $id_sim->find_identity_vary_ecp(0, 0.2, 5); + is(scalar @{$results}, 135, "Correct number of ECP results"); + $results = $id_sim->find_identity_vary_qcr(1, 1, 2); + is(scalar @{$results}, 54, "Correct number of QCR results"); + $results = $id_sim->find_identity_vary_qcs(4, 10, 2); + is(scalar @{$results}, 20, "Correct number of QCS results"); + $results = $id_sim->find_identity_vary_smp(0.1, 0.1, 4); + is(scalar @{$results}, 108, "Correct number of SMP results"); + $results = $id_sim->find_identity_vary_xer(0.05, 0.05, 4); + is(scalar @{$results}, 108, "Correct number of XER results"); +} + +sub _read_tsv { + # read a tab-delimited file + my ($path, ) = @_; + my $csv = Text::CSV->new({sep_char => "\t"}); + my @results; + open my $in, "<", $path || $log->logcroak("Cannot open '$path'"); + while (my $row = $csv->getline($in)) { + push @results, $row; + } + close $in || $log->logcroak("Cannot close '$path'"); + return \@results; +} + +1; diff --git a/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentityTest.pm b/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentityTest.pm index d80da698a..02ecad859 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentityTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/IdentityTest.pm @@ -8,8 +8,8 @@ use File::Slurp qw(read_file); use JSON; use List::AllUtils qw(each_array); -use base qw(Test::Class); -use Test::More tests => 51; +use base qw(WTSI::NPG::Test); +use Test::More tests => 57; use Test::Exception; use plink_binary; @@ -31,6 +31,7 @@ my $expected_json_path = "$data_path/expected_identity_results.json"; my $expected_all_json_path = "$data_path/combined_identity_expected.json"; my $expected_omit_path = "$data_path/expected_omit_results.json"; my $pass_threshold = 0.9; +my $sample_mismatch_prior = 0.01, my $snp_threshold = 8; # sample names with fake QC data @@ -255,10 +256,9 @@ sub sample_swap_evaluation : Test(14) { } } -sub script : Test(7) { +sub script : Test(13) { # test of command-line script # Could move this into Scripts.pm (which is slow to run, ~10 minutes) - my $identity_script_wip = "./bin/check_identity_bed_wip.pl"; my $tempdir = tempdir("IdentityTest.script.$pid.XXXXXX", CLEANUP => 1); my $jsonPath = "$tempdir/identity.json"; @@ -266,6 +266,8 @@ sub script : Test(7) { my $plexDir = "/nfs/srpipe_references/genotypes"; my $plexFile = "$plexDir/W30467_snp_set_info_1000Genomes.tsv"; my $refPath = "$data_path/identity_script_output.json"; + my $refPathAlternate = + "$data_path/identity_script_output_alternate_prior.json"; my $expectedCsvPath = "$data_path/identity_script_output.csv"; my $sampleJson = "$data_path/fake_sample.json"; @@ -301,15 +303,34 @@ sub script : Test(7) { "--plink $data_path/fake_qc_genotypes", "--json $jsonPath", "--csv $csvPath", - "--plex $plexFile1", - "--plex $plexFile2", + "--plex ".$plexFile1.",".$plexFile2, "--sample_json $sampleJson", - "--vcf $vcf1", - "--vcf $vcf2", + "--vcf ".$vcf1.","."$vcf2", ) == 0, 'Script identity check'); + ok(-e $jsonPath, "JSON output written by script, 2 inputs"); + ok(-e $csvPath, "CSV output written by script, 2 inputs"); $outData = from_json(read_file($jsonPath)); is_deeply($outData, $refData, "Script JSON output matches reference file, 2 inputs"); + + # test with alternate prior as a command line argument + $jsonPath = "$tempdir/identity_3.json"; + $csvPath = "$tempdir/identity_3.csv"; + ok(system(join q{ }, "$identity_script_wip", + "--plink $data_path/fake_qc_genotypes", + "--json $jsonPath", + "--csv $csvPath", + "--plex $plexFile", + "--sample_json $sampleJson", + "--vcf $data_path/qc_plex_calls.vcf", + "--prior 0.1" + ) == 0, 'Script identity check'); + ok(-e $jsonPath, "JSON output written by script, alternate prior"); + ok(-e $csvPath, "CSV output written by script, alternate prior"); + my $refDataAlternatePrior = from_json(read_file($refPathAlternate)); + $outData = from_json(read_file($jsonPath)); + is_deeply($outData, $refDataAlternatePrior, + "Script JSON output matches reference file, alternate prior"); } sub _get_swap_sample_identities { @@ -333,7 +354,8 @@ sub _get_swap_sample_identities { snpset => $snpset, production_calls => $production_calls->{$sample_name}, qc_calls => $qc_calls->{$sample_name}, - pass_threshold => $pass_threshold); + pass_threshold => $pass_threshold, + sample_mismatch_prior => $sample_mismatch_prior); my $sample_id = WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian-> new(\%args); diff --git a/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesianTest.pm b/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesianTest.pm index 9f531d74b..4502506a6 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesianTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/QC_wip/Check/SampleIdentityBayesianTest.pm @@ -4,10 +4,9 @@ package WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesianTest; use strict; use warnings; use File::Slurp qw(read_file); -use File::Temp qw(tempdir); use JSON; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 9; use Test::Exception; @@ -21,6 +20,7 @@ my $data_path = './t/qc/check/identity'; my $snpset_file = "$data_path/W30467_snp_set_info_1000Genomes.tsv"; my $sample_name = 'urn:wtsi:249442_C09_HELIC5102247'; my $pass_threshold = 0.9; +my $sample_mismatch_prior = 0.01; my ($qc_calls, $production_calls, $qc_calls_small, $production_calls_small); our @CALLSET_NAMES = qw/callset_bar callset_foo/; @@ -173,11 +173,13 @@ sub construct : Test(1) { my $snpset = WTSI::NPG::Genotyping::SNPSet->new($snpset_file); - my @args = (sample_name => $sample_name, - snpset => $snpset, - production_calls => $production_calls, - qc_calls => $qc_calls, - pass_threshold => $pass_threshold); + my @args = (sample_name => $sample_name, + snpset => $snpset, + production_calls => $production_calls, + qc_calls => $qc_calls, + pass_threshold => $pass_threshold, + sample_mismatch_prior => $sample_mismatch_prior, + ); new_ok('WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian' => \@args); @@ -192,7 +194,8 @@ sub output : Test(5) { snpset => $snpset, production_calls => $production_calls, qc_calls => $qc_calls, - pass_threshold => $pass_threshold); + pass_threshold => $pass_threshold, + sample_mismatch_prior => $sample_mismatch_prior); is_deeply($sib->qc_callset_names, \@CALLSET_NAMES, "QC callset names match"); @@ -230,7 +233,8 @@ sub output : Test(5) { snpset => $snpset, production_calls => $production_calls, qc_calls => \@anonymous_qc_calls, - pass_threshold => $pass_threshold); + pass_threshold => $pass_threshold, + sample_mismatch_prior => $sample_mismatch_prior); is_deeply($anon_sib->qc_callset_names, ['_unknown_callset_', 'callset_bar'], "Anonymous calls assigned to 'unknown' callset name"); @@ -252,7 +256,8 @@ sub metric : Test(2) { snpset => $snpset, production_calls => $production_calls, qc_calls => $qc_calls, - pass_threshold => $pass_threshold); + pass_threshold => $pass_threshold, + sample_mismatch_prior => $sample_mismatch_prior); my $sib = WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian-> new(\%args); ok(abs($sib->identity - $expected_big) < $delta, @@ -262,7 +267,8 @@ sub metric : Test(2) { snpset => $snpset, production_calls => $production_calls_small, qc_calls => $qc_calls_small, - pass_threshold => $pass_threshold); + pass_threshold => $pass_threshold, + sample_mismatch_prior => $sample_mismatch_prior); my $sib_small = WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesian-> new(\%args); diff --git a/src/perl/t/WTSI/NPG/Genotyping/SNPSetPublisherTest.pm b/src/perl/t/WTSI/NPG/Genotyping/SNPSetPublisherTest.pm index 88315695d..6c843f2bd 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/SNPSetPublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/SNPSetPublisherTest.pm @@ -9,7 +9,7 @@ use warnings; use File::Compare; use File::Temp qw(tempdir); -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 8; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/SNPSetTest.pm b/src/perl/t/WTSI/NPG/Genotyping/SNPSetTest.pm index 53ec5b2ca..e91aef887 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/SNPSetTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/SNPSetTest.pm @@ -10,9 +10,9 @@ use File::Compare; use File::Temp qw(tempdir tempfile); use List::AllUtils qw(all); -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; -use Test::More tests => 52; +use Test::More tests => 50; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -199,7 +199,7 @@ sub snp_names : Test(2) { 'Contains expected SNP names') or diag explain \@snp_names; } -sub snp_name_map : Test(26) { +sub snp_name_map : Test(24) { my $snpset = WTSI::NPG::Genotyping::SNPSet->new (file_name => "$data_path/$data_file"); my $snpset_renamed = WTSI::NPG::Genotyping::SNPSet->new diff --git a/src/perl/t/WTSI/NPG/Genotyping/SNPTest.pm b/src/perl/t/WTSI/NPG/Genotyping/SNPTest.pm index 95b13b478..716e80b44 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/SNPTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/SNPTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::SNPTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 5; use Test::Exception; use Log::Log4perl; diff --git a/src/perl/t/WTSI/NPG/Genotyping/ScriptsTest.pm b/src/perl/t/WTSI/NPG/Genotyping/ScriptsTest.pm index 2e78da0e7..c59cbe122 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/ScriptsTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/ScriptsTest.pm @@ -10,13 +10,14 @@ use File::Temp qw(tempdir); use Log::Log4perl; use JSON; -use base qw(Test::Class); -use Test::More tests => 33; +use base qw(WTSI::NPG::Test); +use Test::More tests => 32; use Test::Exception; use WTSI::NPG::iRODS; -Log::Log4perl::init('./etc/log4perl_tests.conf'); +my $logconf = './etc/log4perl_tests.conf'; +Log::Log4perl::init($logconf); our $PUBLISH_SNPSET = './bin/publish_snpset.pl'; our $PUBLISH_FLUIDIGM_GENOTYPES = './bin/publish_fluidigm_genotypes.pl'; @@ -60,6 +61,7 @@ sub test_publish_snpset : Test(1) { ok(system(join q{ }, "$PUBLISH_SNPSET", "--dest $irods_tmp_coll", + "--logconf $logconf", "--reference-name '$reference_name'", "--snpset-name $snpset_name", "--snpset-platform $snpset_platform", @@ -76,10 +78,11 @@ sub test_publish_fluidigm_genotypes : Test(2) { ok(system(join q{ }, "$PUBLISH_SNPSET", "--dest $irods_tmp_coll", + "--logconf $logconf", "--reference-name '$reference_name'", "--snpset-name $snpset_name", "--snpset-platform $snpset_platform", - "--source $snpset_file") == 0, 'Published SNPSet'); + "--source $snpset_file") == 0, 'Published SNPSet for Fluidigm'); # Includes a directory with a missing CSV file to check that the # script exits successfully when ths happens. @@ -87,6 +90,7 @@ sub test_publish_fluidigm_genotypes : Test(2) { "--days-ago 0", "--days 1000000", "--dest $irods_tmp_coll", + "--logconf $logconf", "--reference-path $irods_tmp_coll", "--source $raw_data_path", "2>/dev/null") == 0, @@ -103,15 +107,18 @@ sub test_publish_infinium_genotypes : Test(3) { "--days 1", "--project foo", "--dest $irods_tmp_coll", + "--logconf $logconf", "2>/dev/null") != 0, '--project conflicts with --days'); ok(system(join q{ }, "$PUBLISH_INFINIUM_GENOTYPES", "--days-ago 0", "--days 0", + "--logconf $logconf", "2>/dev/null") != 0, 'Requires --dest'); ok(system(join q{ }, "$PUBLISH_INFINIUM_GENOTYPES", "--dest $irods_tmp_coll", + "--logconf $logconf", "- < $raw_data_list") == 0, 'Published Infinium genotypes from a file list'); } @@ -155,6 +162,7 @@ sub test_query_project_samples : Test(2) { ok(system(join q{ }, "$QUERY_PROJECT_SAMPLES", "--project coreex_bbgahs", "--limit 2", + "--logconf $logconf", "--header", "--root $irods_tmp_coll", "--out $outpath") == 0, @@ -182,6 +190,7 @@ sub test_update_infinium_metadata : Test(2) { ok(system(join q{ }, "$PUBLISH_INFINIUM_GENOTYPES", "--dest $irods_tmp_coll", + "--logconf $logconf", "- < $raw_data_list") == 0, 'Published Infinium genotypes from a file list'); @@ -212,6 +221,7 @@ sub test_publish_infinium_analysis : Test(7) { ok(system(join q{ }, "$PUBLISH_INFINIUM_GENOTYPES", "--dest $archive_coll", + "--logconf $logconf", "-", "<", "$raw_data_list") == 0, 'Published Infinium genotypes from a file list'); @@ -221,8 +231,7 @@ sub test_publish_infinium_analysis : Test(7) { ok(system("$READY_PIPE --dbfile $dbfile") == 0); ok(system(join q{ }, "$READY_INFINIUM", - "--dbfile $dbfile", - "--run $run", + "--dbfile $dbfile", "--run $run", "--supplier $supplier", "--project '$project'") == 0, 'Ready infinium'); @@ -245,6 +254,7 @@ sub test_publish_infinium_analysis : Test(7) { "--dbfile $dbfile", "--source $analysis_path", "--dest $analysis_coll", + "--logconf $logconf", "--archive $archive_coll", "--run $run") == 0, 'Published analysis'); } @@ -257,7 +267,7 @@ sub test_ready_pipe : Test(2) { ok(-e "$dbfile"); } -sub test_ready_infinium : Test(8) { +sub test_ready_infinium : Test(7) { my $tmpdir = tempdir(CLEANUP => 1); my $dbfile = "$tmpdir/test_ready_infinium.db"; @@ -330,6 +340,7 @@ sub test_publish_expression_analysis : Test(1) { "--sample-source $idat_path", "--analysis-dest $analysis_coll", "--sample-dest $archive_coll", + "--logconf $logconf", "--manifest $manifest_path/hipsci_12samples_2014-02-12.txt", "2>/dev/null") == 0, 'Published expression analysis'); } @@ -347,10 +358,12 @@ sub test_update_expression_metadata : Test(2) { "--sample-source $idat_path", "--analysis-dest $analysis_coll", "--sample-dest $archive_coll", + "--logconf $logconf", "--manifest $manifest_path/hipsci_12samples_2014-02-12.txt", "2>/dev/null") == 0, 'Published expression analysis'); ok(system(join q{ }, "$UPDATE_EXPRESSION_METADATA", + "--logconf $logconf", "--dest $irods_tmp_coll") == 0, 'Updated expression metadata'); } diff --git a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayDataObjectTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayDataObjectTest.pm index 64acb87c6..0a2349f23 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayDataObjectTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayDataObjectTest.pm @@ -92,7 +92,7 @@ package WTSI::NPG::Genotyping::Sequenom::AssayDataObjectTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 17; use Test::Exception; @@ -224,8 +224,14 @@ sub update_secondary_metadata_missing_value : Test(2) { # so this one is done first. The test ensures that an invalid AVU # value only causes that AVU to be skipped - all subsequent ones are # applied. + # + # In this instance, the update for 'dcterms:identifier' fails because of + # the invalid value, so the existing 'dcterms:identifier' AVU remains + # unchanged with a value of '9999999999'. Subsequently, the value of + # 'sample_supplier_name' is successfully updated from 'zzzzzzzzzz' to + # 'aaaaaaaaaa'. my $expected_meta = - [# attribute => 'dcterms:identifier', value => '0123456789'}, + [{attribute => 'dcterms:identifier', value => '9999999999'}, {attribute => 'sample', value => 'sample1' }, {attribute => 'sample_accession_number', value => 'A0123456789'}, {attribute => 'sample_cohort', value => 'AAA111222333'}, diff --git a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultSetTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultSetTest.pm index e8e699553..00fb926f7 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultSetTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultSetTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Sequenom::AssayResultSetTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Test::More tests => 11; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultTest.pm index 268b80b7e..46440636d 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/AssayResultTest.pm @@ -6,7 +6,7 @@ package WTSI::NPG::Genotyping::Sequenom::AssayResultTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use File::Spec; use Log::Log4perl; use Test::More tests => 11; diff --git a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/PublisherTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/PublisherTest.pm index 8c19f139e..a467680f3 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/PublisherTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/PublisherTest.pm @@ -133,7 +133,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 46; use Test::Exception; @@ -314,7 +314,8 @@ sub test_metadata { ok($data_object->get_avu('dcterms:modified'), 'Has dcterms:modified'); } else { - ok(!$data_object->get_avu('dcterms:modified'), 'Has no dcterms:modified'); + my @exists = $data_object->find_in_metadata('dcterms:modified'); + ok(scalar(@exists)==0, 'Has no dcterms:modified'); } foreach my $avu (@$expected_metadata) { diff --git a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/SubscriberTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/SubscriberTest.pm index 23725be3f..1e1c27c09 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Sequenom/SubscriberTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Sequenom/SubscriberTest.pm @@ -6,12 +6,14 @@ use strict; use warnings; use DateTime; use File::Path qw/make_path/; +use File::Slurp qw/read_file/; use File::Spec::Functions qw/catfile/; -use File::Temp qw(tempdir); -use List::AllUtils qw(uniq); +use File::Temp qw/tempdir/; +use JSON; +use List::AllUtils qw/uniq/; -use base qw(Test::Class); -use Test::More tests => 8; +use base qw(WTSI::NPG::Test); +use Test::More tests => 16; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -25,6 +27,8 @@ use WTSI::NPG::Genotyping::Sequenom::AssayDataObject; use WTSI::NPG::iRODS; use WTSI::NPG::iRODS::DataObject; +use Data::Dumper; # FIXME development + my $data_path = './t/sequenom_subscriber'; my @assay_resultset_files = qw(sequenom_001.csv sequenom_002.csv sequenom_003.csv sequenom_004.csv); @@ -36,6 +40,7 @@ my $non_unique_identifier = 'ABCDEFGHI'; my $reference_name = 'Homo_sapiens (GRCh37)'; my $snpset_name = 'W30467_GRCh37'; my $snpset_file = 'W30467_snp_set_info_GRCh37.tsv'; +my $chromosome_length_file = 'chromosome_lengths_GRCh37.json'; my $tmp; my $irods_tmp_coll; @@ -46,12 +51,17 @@ sub make_fixture : Test(setup) { my $irods = WTSI::NPG::iRODS->new; $irods_tmp_coll = "SequenomSubscriberTest.$pid"; $irods->add_collection($irods_tmp_coll); - $irods->add_object("$data_path/$snpset_file", "$irods_tmp_coll/$snpset_file"); + my $chromosome_lengths_irods = "$irods_tmp_coll/$chromosome_length_file"; + $irods->add_object("$data_path/$chromosome_length_file", + $chromosome_lengths_irods); + $irods->add_object("$data_path/$snpset_file", + "$irods_tmp_coll/$snpset_file"); my $snpset_obj = WTSI::NPG::iRODS::DataObject->new ($irods,"$irods_tmp_coll/$snpset_file")->absolute; - $snpset_obj->add_avu('fluidigm_plex', $snpset_name); + $snpset_obj->add_avu('sequenom_plex', $snpset_name); $snpset_obj->add_avu('reference_name', $reference_name); + $snpset_obj->add_avu('chromosome_json', $chromosome_lengths_irods); foreach my $i (0..3) { my $file = $assay_resultset_files[$i]; @@ -100,6 +110,19 @@ sub constructor : Test(1) { snpset_name => $snpset_name]); } +sub find_object_paths : Test(1) { + my $irods = WTSI::NPG::iRODS->new; + my @obj_paths = WTSI::NPG::Genotyping::Sequenom::Subscriber->new + (irods => $irods, + data_path => $irods_tmp_coll, + reference_path => $irods_tmp_coll, + reference_name => $reference_name, + snpset_name => $snpset_name)->find_object_paths( + [uniq @sample_identifiers]); + ok(scalar @obj_paths == 4, + "Found 4 iRODS object paths for sample results"); +} + sub get_assay_resultsets : Test(5) { my $irods = WTSI::NPG::iRODS->new; my $resultsets1 = WTSI::NPG::Genotyping::Sequenom::Subscriber->new @@ -134,3 +157,73 @@ sub get_assay_resultsets : Test(5) { snpset_name => $snpset_name)->get_assay_resultsets ([map { 'X' . $_ } 1 .. 100]), "'IN' query of 100 args"); } + +sub get_assay_resultsets_and_vcf_metadata : Test(3) { + + my $irods = WTSI::NPG::iRODS->new; + + my $subscriber = WTSI::NPG::Genotyping::Sequenom::Subscriber->new + (irods => $irods, + data_path => $irods_tmp_coll, + reference_path => $irods_tmp_coll, + reference_name => $reference_name, + snpset_name => $snpset_name); + + my ($resultsets_index, $vcf_meta) = + $subscriber->get_assay_resultsets_and_vcf_metadata( + [uniq @sample_identifiers]); + + ok($vcf_meta, "VCF metadata found"); + my $expected_meta = { + 'plex_type' => [ 'sequenom' ], + 'plex_name' => [ 'W30467_GRCh37' ], + 'callset_name' => [ 'sequenom_W30467_GRCh37' ] + }; + is_deeply($vcf_meta, $expected_meta, + "VCF metadata matches expected values"); + ok(scalar keys %{$resultsets_index} == 3, + "Found resultset index for 3 sample identifiers"); +} + +sub get_chromosome_lengths : Test(2) { + my $irods = WTSI::NPG::iRODS->new; + my $chr_lengths = WTSI::NPG::Genotyping::Sequenom::Subscriber->new + (irods => $irods, + data_path => $irods_tmp_coll, + reference_path => $irods_tmp_coll, + reference_name => $reference_name, + snpset_name => $snpset_name)->get_chromosome_lengths(); + ok($chr_lengths, 'Chromosome lengths found'); + my $chromosome_length_path = "$data_path/$chromosome_length_file"; + my $chr_lengths_expected = decode_json(read_file($chromosome_length_path)); + is_deeply($chr_lengths, $chr_lengths_expected, + "Chromosome lengths match expected values"); +} + +sub get_vcf_metadata : Test(2) { + my $irods = WTSI::NPG::iRODS->new; + + my $subscriber = WTSI::NPG::Genotyping::Sequenom::Subscriber->new + (irods => $irods, + data_path => $irods_tmp_coll, + reference_path => $irods_tmp_coll, + reference_name => $reference_name, + snpset_name => $snpset_name); + + my @obj_paths = $subscriber->find_object_paths( + [uniq @sample_identifiers]); + my @data_objects = map { + WTSI::NPG::Genotyping::Sequenom::AssayDataObject->new + ($irods, $_); + } @obj_paths; + my $vcf_meta = $subscriber->vcf_metadata_from_irods(\@data_objects); + ok($vcf_meta, "VCF metadata found"); + my $expected_meta = { + 'plex_type' => [ 'sequenom' ], + 'plex_name' => [ 'W30467_GRCh37' ], + 'callset_name' => [ 'sequenom_W30467_GRCh37' ] + }; + is_deeply($vcf_meta, $expected_meta, + "VCF metadata matches expected values"); +} + diff --git a/src/perl/t/WTSI/NPG/Genotyping/UpdatePlinkAnnotationTest.pm b/src/perl/t/WTSI/NPG/Genotyping/UpdatePlinkAnnotationTest.pm index 196cf9207..24259c410 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/UpdatePlinkAnnotationTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/UpdatePlinkAnnotationTest.pm @@ -1,7 +1,7 @@ use utf8; -package WTSI::NPG::Genotyping::UpdatePlinkAnnotation; +package WTSI::NPG::Genotyping::UpdatePlinkAnnotationTest; use strict; use warnings; @@ -9,7 +9,7 @@ use File::Temp qw(tempdir tempfile); use JSON; use Log::Log4perl; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 9; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Genotyping/VCF/ReadyPlexCallsTest.pm b/src/perl/t/WTSI/NPG/Genotyping/VCF/ReadyPlexCallsTest.pm deleted file mode 100644 index d551e692b..000000000 --- a/src/perl/t/WTSI/NPG/Genotyping/VCF/ReadyPlexCallsTest.pm +++ /dev/null @@ -1,266 +0,0 @@ -use utf8; - -package WTSI::NPG::Genotyping::VCF::ReadyPlexCallsTest; - -use strict; -use warnings; - -use base qw(Test::Class); -use Test::More tests => 6; -use Test::Exception; -use File::Path qw/make_path/; -use File::Slurp qw/read_file/; -use File::Spec::Functions qw/catfile/; -use File::Temp qw/tempdir/; -use JSON; -use Log::Log4perl; -use WTSI::NPG::iRODS; - -our $LOG_TEST_CONF = './etc/log4perl_tests.conf'; - -Log::Log4perl::init($LOG_TEST_CONF); - -# test for ready_qc_calls.pl -# TODO Later merge this into ScriptsTest.pm, but keep separate for now for quicker testing in development (running ScriptsTest.pm takes ~11 minutes!) - -# Requirements: -# - Pipeline database for sample names (or option to read from file) -# - Appropriate (dummy?) Fluidigm/Sequenom files in iRODS -# - Run script and validate VCF output - - -our $READY_QC_CALLS = './bin/ready_qc_calls.pl'; - -my $irods; -my $irods_tmp_coll; -my $pid = $$; -my $data_path = './t/vcf'; -my $tmp; - -# fluidigm test data -my $f_expected_vcf = $data_path."/fluidigm.vcf"; -my $f_reference_name = "Homo_sapiens (1000Genomes)"; -my $f_snpset_id = 'qc'; -my $f_snpset_filename = 'qc_fluidigm_snp_info_GRCh37.tsv'; -my @f_input_files = qw(fluidigm_001.csv fluidigm_002.csv - fluidigm_003.csv fluidigm_004.csv); -my @f_sample_ids = qw(sample_001 sample_002 sample_003 sample_004); -my $f_sample_json = $data_path."/fluidigm_samples.json"; -my $f_params_name = "params_fluidigm.json"; - -# sequenom test data -my $s_expected_vcf = $data_path."/sequenom.vcf"; -my $s_reference_name = "Homo_sapiens (1000Genomes)"; -my $s_snpset_id = 'W30467'; -my $s_snpset_filename = 'W30467_snp_set_info_GRCh37.tsv'; -my $s_snpset_filename_1 = 'W30467_snp_set_info_GRCh37_1.tsv'; -my @s_sample_ids = qw(sample_001 sample_002 sample_003 sample_004); -my $s_sample_json = $data_path."/sequenom_samples.json"; -my $s_params_name = "params_sequenom.json"; -my $s_params_name_1 = "params_sequenom_1.json"; - -my $log = Log::Log4perl->get_logger(); - -my $tfc = 0; # text fixture count - -sub make_fixture : Test(setup) { - $tmp = tempdir("ready_plex_test_XXXXXX", CLEANUP => 1); - $log->info("Created temporary directory $tmp"); - $irods = WTSI::NPG::iRODS->new; - $irods_tmp_coll = $irods->add_collection("ReadyPlexCallsTest.$pid.$tfc"); - $tfc++; - - # set up dummy fasta reference - $ENV{NPG_REPOSITORY_ROOT} = $tmp; - my $fastadir = catfile($tmp, 'references', 'Homo_sapiens', - 'GRCh37_53', 'all', 'fasta'); - make_path($fastadir); - my $reference_file_path = catfile($fastadir, - 'Homo_sapiens.GRCh37.dna.all.fa'); - open my $fh, '>>', $reference_file_path || $log->logcroak( - "Cannot open reference file path '", $reference_file_path, "'"); - close $fh || $log->logcroak( - "Cannot close reference file path '", $reference_file_path, "'"); -} - -sub setup_fluidigm { - # add some dummy fluidigm CSV files to the temporary collection - # add sample and snpset names to metadata - for (my $i=0;$i<@f_input_files;$i++) { - my $input = $f_input_files[$i]; - my $ipath = $irods_tmp_coll."/".$input; - $irods->add_object($data_path."/".$input, $ipath); - $irods->add_object_avu($ipath,'dcterms:identifier',$f_sample_ids[$i]); - $irods->add_object_avu($ipath, 'fluidigm_plex', $f_snpset_id); - } - # add chromosome_json to temp irods - my $chromosome_json_filename = "chromosome_lengths_GRCh37.json"; - my $cjson = $data_path."/".$chromosome_json_filename; - my $cjson_irods = $irods_tmp_coll."/".$chromosome_json_filename; - $irods->add_object($cjson, $cjson_irods); - my $snpset_path = $irods_tmp_coll."/".$f_snpset_filename; - $irods->add_object($data_path."/".$f_snpset_filename, $snpset_path); - $irods->add_object_avu($snpset_path, 'chromosome_json', $cjson_irods); - $irods->add_object_avu($snpset_path, 'fluidigm_plex', $f_snpset_id); - $irods->add_object_avu($snpset_path, 'reference_name', $f_reference_name); - # write JSON config file with test params - my %params = ( - "irods_data_path" => $irods_tmp_coll, - "platform" => "fluidigm", - "reference_name" => $f_reference_name, - "reference_path" => $irods_tmp_coll, - "snpset_name" => $f_snpset_id, - ); - my $params_path_fluidigm = $tmp."/".$f_params_name; - open my $out, ">", $params_path_fluidigm || - $log->logcroak("Cannot open test parameter path '", - $params_path_fluidigm, "'"); - print $out to_json(\%params); - close $out || - $log->logcroak("Cannot close test parameter path '", - $params_path_fluidigm, "'"); -} - -sub setup_sequenom_alternate { - my @s_input_files = qw(sequenom_alternate_snp_001.csv - sequenom_alternate_snp_002.csv - sequenom_alternate_snp_003.csv - sequenom_alternate_snp_004.csv); - setup_sequenom(\@s_input_files); -} - -sub setup_sequenom_default { - my @s_input_files = qw(sequenom_001.csv - sequenom_002.csv - sequenom_003.csv - sequenom_004.csv); - setup_sequenom(\@s_input_files); -} - -sub setup_sequenom { - # add some dummy sequenom CSV files to the temporary collection - # add sample and snpset names to metadata - my @s_input_files = @{$_[0]}; - my $snpset_v1 = "1.0"; - my $snpset_v2 = "2.0"; - # upload regular and alternate-snp input files to iRODS - for (my $i=0;$i<@s_input_files;$i++) { - my $input = $s_input_files[$i]; - my $ipath = $irods_tmp_coll."/".$input; - $irods->add_object($data_path."/".$input, $ipath); - $irods->add_object_avu($ipath,'dcterms:identifier',$s_sample_ids[$i]); - $irods->add_object_avu($ipath, 'sequenom_plex', $s_snpset_id); - } - # add chromosome_json to temp irods - my $chromosome_json_filename = "chromosome_lengths_GRCh37.json"; - my $cjson = $data_path."/".$chromosome_json_filename; - my $cjson_irods = $irods_tmp_coll."/".$chromosome_json_filename; - $irods->add_object($cjson, $cjson_irods); - # add snpset (version "1.0") - my $snpset_1 = $irods_tmp_coll."/".$s_snpset_filename_1; - $irods->add_object($data_path."/".$s_snpset_filename_1, $snpset_1); - $irods->add_object_avu($snpset_1, 'chromosome_json', $cjson_irods); - $irods->add_object_avu($snpset_1, 'sequenom_plex', $s_snpset_id); - $irods->add_object_avu($snpset_1, 'reference_name', $s_reference_name); - $irods->add_object_avu($snpset_1,'snpset_version', $snpset_v1); - # add snpset (version "2.0") - my $snpset_path = $irods_tmp_coll."/".$s_snpset_filename; - $irods->add_object($data_path."/".$s_snpset_filename, $snpset_path); - $irods->add_object_avu($snpset_path, 'chromosome_json', $cjson_irods); - $irods->add_object_avu($snpset_path, 'sequenom_plex', $s_snpset_id); - $irods->add_object_avu($snpset_path, 'reference_name', $s_reference_name); - $irods->add_object_avu($snpset_path, 'snpset_version', $snpset_v2); - # write JSON config file with test params - my %params = ( - irods_data_path => $irods_tmp_coll, - platform => "sequenom", - reference_name => $s_reference_name, - reference_path => $irods_tmp_coll, - snpset_name => $s_snpset_id, - read_snpset_version => $snpset_v2, - write_snpset_version => $snpset_v2, - ); - my $config_path = $tmp."/".$s_params_name; - my $out; - open $out, ">", $config_path || - $log->logcroak("Cannot open config file '", $config_path, "'"); - print $out to_json(\%params); - close $out || - $log->logcroak("Cannot close config file '", $config_path, "'"); - # write another JSON config file with alternate input snpset - $params{'read_snpset_version'} = $snpset_v1; - my $config_path_1 = $tmp."/".$s_params_name_1; - open $out, ">", $config_path_1 || - $log->logcroak("Cannot open config file '", $config_path_1, "'"); - print $out to_json(\%params); - close $out || - $log->logcroak("Cannot close config file '", $config_path_1, "'"); -} - -sub teardown : Test(teardown) { - $irods->remove_collection($irods_tmp_coll); -} - -sub test_ready_calls_fluidigm : Test(2) { - setup_fluidigm(); - - my $vcf_out = "$tmp/test_fluidigm.vcf"; - my $params_path_fluidigm = $tmp."/".$f_params_name; - my $cmd = join q{ }, "$READY_QC_CALLS", - "--config $params_path_fluidigm", - "--samples $f_sample_json", - "--logconf $LOG_TEST_CONF", - "--out $vcf_out"; - ok(system($cmd) == 0, 'Wrote Fluidigm calls to VCF'); - my @got_lines = read_file($vcf_out); - @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; - my @expected_lines = read_file($f_expected_vcf); - @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; - is_deeply(\@got_lines, \@expected_lines, - "Fluidigm VCF output matches expected values"); - -} - -sub test_ready_calls_sequenom : Test(2) { - setup_sequenom_default(); - - my $vcf_out = "$tmp/test_sequenom.vcf"; - my $params_path_sequenom = $tmp."/".$s_params_name; - my $cmd = join q{ }, "$READY_QC_CALLS", - "--config $params_path_sequenom", - "--samples $s_sample_json", - "--logconf $LOG_TEST_CONF", - "--out $vcf_out"; - ok(system($cmd) == 0, 'Wrote Sequenom calls to VCF'); - my @got_lines = read_file($vcf_out); - @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; - my @expected_lines = read_file($s_expected_vcf); - @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; - is_deeply(\@got_lines, \@expected_lines, - "Sequenom VCF output matches expected values"); - -} - -sub test_ready_calls_sequenom_alternate_snp : Test(2) { - # tests handling of renamed SNP in different manifest versions - setup_sequenom_alternate(); - - my $vcf_out = "$tmp/test_sequenom.vcf"; - my $params_path_sequenom_1 = $tmp."/".$s_params_name_1; - my $cmd = join q{ }, "$READY_QC_CALLS", - "--config $params_path_sequenom_1", - "--samples $s_sample_json", - "--logconf $LOG_TEST_CONF", - "--out $vcf_out"; - ok(system($cmd) == 0, 'Wrote Sequenom calls to VCF'); - my @got_lines = read_file($vcf_out); - @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; - my @expected_lines = read_file($s_expected_vcf); - @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; - is_deeply(\@got_lines, \@expected_lines, - "Sequenom VCF output matches expected values"); - -} - -return 1; - diff --git a/src/perl/t/WTSI/NPG/Genotyping/VCF/ReadyWorkflowTest.pm b/src/perl/t/WTSI/NPG/Genotyping/VCF/ReadyWorkflowTest.pm new file mode 100644 index 000000000..4eb56bc4d --- /dev/null +++ b/src/perl/t/WTSI/NPG/Genotyping/VCF/ReadyWorkflowTest.pm @@ -0,0 +1,679 @@ +use utf8; + +package WTSI::NPG::Genotyping::VCF::ReadyWorkflowTest; + +use strict; +use warnings; + +use base qw(WTSI::NPG::Test); +use Cwd qw/abs_path/; +use Test::More tests => 66; +use Test::Exception; +use File::Basename qw(fileparse); +use File::Path qw/make_path/; +use File::Slurp qw/read_file/; +use File::Spec::Functions qw/catfile/; +use File::Temp qw/tempdir/; +use JSON; +use Log::Log4perl; +use WTSI::NPG::iRODS; +use YAML qw/LoadFile/; + +use WTSI::NPG::Genotyping::VCF::PlexResultFinder; + +our $LOG_TEST_CONF = './etc/log4perl_tests.conf'; + +Log::Log4perl::init($LOG_TEST_CONF); + +# test for ready_qc_calls.pl and ready_workflow.pm +# TODO Later merge this into ScriptsTest.pm, but keep separate for now for quicker testing in development (running ScriptsTest.pm takes ~11 minutes!) + +our $READY_QC_CALLS = './bin/ready_qc_calls.pl'; +our $READY_WORKFLOW = './bin/ready_workflow.pl'; + +my $irods; +my $irods_tmp_coll; +my $pid = $$; +my $data_path = abs_path('./t/vcf'); +my $tmp; + +my $db_file_name = "4_samples.db"; +my $dbfile = catfile($data_path, $db_file_name); + +$ENV{'GENOTYPE_TEST_DATA'} = '/nfs/gapi/data/genotype/pipeline_test/'; + +my $manifest = catfile($ENV{'GENOTYPE_TEST_DATA'}, + "Human670-QuadCustom_v1_A.bpm.csv"); +my $egt = catfile($ENV{'GENOTYPE_TEST_DATA'}, + "Human670-QuadCustom_v1_A.egt"); + +# fluidigm test data +my $f_expected_vcf = $data_path."/fluidigm.vcf"; +my $f_reference_name = "Homo_sapiens (1000Genomes)"; +my $f_snpset_id = 'qc'; +my $f_snpset_filename = 'qc_fluidigm_snp_info_GRCh37.tsv'; +my @f_input_files = qw(fluidigm_001.csv fluidigm_002.csv + fluidigm_003.csv fluidigm_004.csv); +my $f_sample_json = $data_path."/fluidigm_samples.json"; +my $f_params_name = "params_fluidigm.json"; + +# sequenom test data +my $s_expected_vcf = $data_path."/sequenom.vcf"; +my $s_reference_name = "Homo_sapiens (1000Genomes)"; +my $s_snpset_id = 'W30467'; +my $s_snpset_filename = 'W30467_snp_set_info_GRCh37.tsv'; +my $s_snpset_filename_1 = 'W30467_snp_set_info_GRCh37_1.tsv'; +my $s_sample_json = $data_path."/sequenom_samples.json"; +my $s_params_name = "params_sequenom.json"; +my $s_params_name_1 = "params_sequenom_1.json"; + +my @sample_ids = qw(urn:wtsi:plate0001_A01_sample000001 + urn:wtsi:plate0001_B01_sample000002 + urn:wtsi:plate0001_C01_sample000003 + urn:wtsi:plate0001_D01_sample000004); +my $chromosome_json_filename = "chromosome_lengths_GRCh37.json"; +my $cjson_irods; + +my $log = Log::Log4perl->get_logger(); + +my $tfc = 0; # text fixture count + + +sub require : Test(1) { + require_ok('WTSI::NPG::Genotyping::VCF::PlexResultFinder'); +} + +sub construct : Test(1) { + + my $fluidigm_params = setup_fluidigm(); + new_ok('WTSI::NPG::Genotyping::VCF::PlexResultFinder', + [irods => $irods, + sample_ids => ['sample_1', 'sample_2'], + subscriber_config => [$fluidigm_params, ], + ]); + +} + +sub make_fixture : Test(setup) { + $tmp = tempdir("ready_plex_test_XXXXXX", CLEANUP => 1); + $log->info("Created temporary directory $tmp"); + $irods = WTSI::NPG::iRODS->new; + $irods_tmp_coll = $irods->add_collection("ReadyPlexCallsTest.$pid.$tfc"); + $tfc++; + $cjson_irods = $irods_tmp_coll."/".$chromosome_json_filename; + + # set up dummy fasta reference + $ENV{NPG_REPOSITORY_ROOT} = $tmp; + my $fastadir = catfile($tmp, 'references', 'Homo_sapiens', + 'GRCh37_53', 'all', 'fasta'); + make_path($fastadir); + my $reference_file_path = catfile($fastadir, + 'Homo_sapiens.GRCh37.dna.all.fa'); + open my $fh, '>>', $reference_file_path || $log->logcroak( + "Cannot open reference file path '", $reference_file_path, "'"); + close $fh || $log->logcroak( + "Cannot close reference file path '", $reference_file_path, "'"); +} + +sub setup_fluidigm { + # add some dummy fluidigm CSV files to the temporary collection + # add sample and snpset names to metadata + # optionally, use a different reference name + my ($reference_name, ) = @_; + $reference_name ||= $f_reference_name; + for (my $i=0;$i<@f_input_files;$i++) { + my $input = $f_input_files[$i]; + my $ipath = $irods_tmp_coll."/".$input; + $irods->add_object($data_path."/".$input, $ipath); + $irods->add_object_avu($ipath,'dcterms:identifier', $sample_ids[$i]); + $irods->add_object_avu($ipath, 'fluidigm_plex', $f_snpset_id); + } + my $snpset_path = $irods_tmp_coll."/".$f_snpset_filename; + $irods->add_object($data_path."/".$f_snpset_filename, $snpset_path); + $irods->add_object_avu($snpset_path, 'chromosome_json', $cjson_irods); + $irods->add_object_avu($snpset_path, 'fluidigm_plex', $f_snpset_id); + $irods->add_object_avu($snpset_path, 'reference_name', $f_reference_name); + # write JSON config file with test params + my %params = ( + "data_path" => $irods_tmp_coll, + "platform" => "fluidigm", + "reference_name" => $reference_name, + "reference_path" => $irods_tmp_coll, + "snpset_name" => $f_snpset_id, + "callset" => "fluidigm_".$f_snpset_id, + ); + my $params_path_fluidigm = $tmp."/".$f_params_name; + open my $out, ">", $params_path_fluidigm || + $log->logcroak("Cannot open test parameter path '", + $params_path_fluidigm, "'"); + print $out to_json(\%params); + close $out || + $log->logcroak("Cannot close test parameter path '", + $params_path_fluidigm, "'"); + return $params_path_fluidigm; +} + +sub setup_sequenom_alternate { + my @s_input_files = qw(sequenom_alternate_snp_001.csv + sequenom_alternate_snp_002.csv + sequenom_alternate_snp_003.csv + sequenom_alternate_snp_004.csv); + my ($default_config, $alternate_config) = setup_sequenom(\@s_input_files); + return $alternate_config; +} + +sub setup_sequenom_default { + my @s_input_files = qw(sequenom_001.csv + sequenom_002.csv + sequenom_003.csv + sequenom_004.csv); + my ($default_config, $alternate_config) = setup_sequenom(\@s_input_files); + return $default_config; +} + +sub setup_sequenom { + # add some dummy sequenom CSV files to the temporary collection + # add sample and snpset names to metadata + my @s_input_files = @{$_[0]}; + my $snpset_v1 = "1.0"; + my $snpset_v2 = "2.0"; + # upload regular and alternate-snp input files to iRODS + for (my $i=0;$i<@s_input_files;$i++) { + my $input = $s_input_files[$i]; + my $ipath = $irods_tmp_coll."/".$input; + $irods->add_object($data_path."/".$input, $ipath); + $irods->add_object_avu($ipath,'dcterms:identifier', $sample_ids[$i]); + $irods->add_object_avu($ipath, 'sequenom_plex', $s_snpset_id); + } + # add snpset (version "1.0") + my $snpset_1 = $irods_tmp_coll."/".$s_snpset_filename_1; + $irods->add_object($data_path."/".$s_snpset_filename_1, $snpset_1); + $irods->add_object_avu($snpset_1, 'chromosome_json', $cjson_irods); + $irods->add_object_avu($snpset_1, 'sequenom_plex', $s_snpset_id); + $irods->add_object_avu($snpset_1, 'reference_name', $s_reference_name); + $irods->add_object_avu($snpset_1,'snpset_version', $snpset_v1); + # add snpset (version "2.0") + my $snpset_path = $irods_tmp_coll."/".$s_snpset_filename; + $irods->add_object($data_path."/".$s_snpset_filename, $snpset_path); + $irods->add_object_avu($snpset_path, 'chromosome_json', $cjson_irods); + $irods->add_object_avu($snpset_path, 'sequenom_plex', $s_snpset_id); + $irods->add_object_avu($snpset_path, 'reference_name', $s_reference_name); + $irods->add_object_avu($snpset_path, 'snpset_version', $snpset_v2); + # write JSON config file with test params + my %params = ( + data_path => $irods_tmp_coll, + platform => "sequenom", + reference_name => $s_reference_name, + reference_path => $irods_tmp_coll, + snpset_name => $s_snpset_id, + read_snpset_version => $snpset_v2, + write_snpset_version => $snpset_v2, + callset => "sequenom_".$s_snpset_id, + ); + my $config_path = $tmp."/".$s_params_name; + my $out; + open $out, ">", $config_path || + $log->logcroak("Cannot open config file '", $config_path, "'"); + print $out to_json(\%params); + close $out || + $log->logcroak("Cannot close config file '", $config_path, "'"); + # write another JSON config file with alternate input snpset + $params{'read_snpset_version'} = $snpset_v1; + my $config_path_1 = $tmp."/".$s_params_name_1; + open $out, ">", $config_path_1 || + $log->logcroak("Cannot open config file '", $config_path_1, "'"); + print $out to_json(\%params); + close $out || + $log->logcroak("Cannot close config file '", $config_path_1, "'"); + return ($config_path, $config_path_1); +} + +sub setup_chromosome_json { + # upload chromosome json file to temporary irods collection + # can only upload once per collection + # must upload before running setup_fluidigm or setup_sequenom + my $cjson = $data_path."/".$chromosome_json_filename; + $irods->add_object($cjson, $cjson_irods); +} + +sub teardown : Test(teardown) { + $irods->remove_collection($irods_tmp_coll); +} + +sub test_ready_calls_fluidigm : Test(2) { + setup_chromosome_json(); + my $fluidigm_params = setup_fluidigm(); + my $vcf_out = "$tmp/fluidigm_qc.vcf"; + my $cmd = join q{ }, "$READY_QC_CALLS", + "--config $fluidigm_params", + "--dbfile $dbfile", + "--logconf $LOG_TEST_CONF", + "--verbose", + "--out $tmp"; + ok(system($cmd) == 0, 'Wrote Fluidigm calls to VCF'); + my @got_lines = read_file($vcf_out); + @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; + my @expected_lines = read_file($f_expected_vcf); + @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; + is_deeply(\@got_lines, \@expected_lines, + "Fluidigm VCF output matches expected values"); + +} + +sub test_ready_calls_fluidigm_bad_reference : Test(2) { + # test + setup_chromosome_json(); + my $fluidigm_params = setup_fluidigm(); + my $vcf_out = "$tmp/fluidigm_qc.vcf"; + my $cmd = join q{ }, "$READY_QC_CALLS", + "--config $fluidigm_params", + "--dbfile $dbfile", + "--logconf $LOG_TEST_CONF", + "--verbose", + "--out $tmp"; + ok(system($cmd) == 0, 'Wrote Fluidigm calls to VCF'); + my @got_lines = read_file($vcf_out); + @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; + my @expected_lines = read_file($f_expected_vcf); + @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; + is_deeply(\@got_lines, \@expected_lines, + "Fluidigm VCF output matches expected values"); + +} + +sub test_ready_calls_sequenom : Test(2) { + setup_chromosome_json(); + my $sequenom_params = setup_sequenom_default(); + my $vcf_out = "$tmp/sequenom_W30467.vcf"; + my $cmd = join q{ }, "$READY_QC_CALLS", + "--config $sequenom_params", + "--dbfile $dbfile", + "--logconf $LOG_TEST_CONF", + "--out $tmp"; + ok(system($cmd) == 0, 'Wrote Sequenom calls to VCF'); + my @got_lines = read_file($vcf_out); + @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; + my @expected_lines = read_file($s_expected_vcf); + @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; + is_deeply(\@got_lines, \@expected_lines, + "Sequenom VCF output matches expected values"); + +} + +sub test_ready_calls_sequenom_alternate_snp : Test(2) { + # tests handling of renamed SNP in different manifest versions + setup_chromosome_json(); + my $sequenom_params = setup_sequenom_alternate(); + my $vcf_out = "$tmp/sequenom_W30467.vcf"; + my $cmd = join q{ }, "$READY_QC_CALLS", + "--config $sequenom_params", + "--dbfile $dbfile", + "--logconf $LOG_TEST_CONF", + "--out $tmp"; + ok(system($cmd) == 0, 'Wrote Sequenom calls to VCF'); + my @got_lines = read_file($vcf_out); + @got_lines = grep !/^[#]{2}(fileDate|reference)=/, @got_lines; + my @expected_lines = read_file($s_expected_vcf); + @expected_lines = grep !/^[#]{2}(fileDate|reference)=/, @expected_lines; + is_deeply(\@got_lines, \@expected_lines, + "Sequenom VCF output matches expected values"); + +} + +sub test_ready_calls_both : Test(3) { + # test ready calls script with *both* sequenom and fluidigm specified + setup_chromosome_json(); + my $fluidigm_params = setup_fluidigm(); + my $sequenom_params = setup_sequenom_default(); + my $fluidigm_out = catfile($tmp, "fluidigm_qc.vcf"); + my $sequenom_out = catfile($tmp, "sequenom_W30467.vcf"); + my $cmd = join q{ }, "$READY_QC_CALLS", + "--config $fluidigm_params,$sequenom_params", + "--dbfile $dbfile", + "--logconf $LOG_TEST_CONF", + "--out $tmp"; + ok(system($cmd) == 0, 'Wrote Sequenom and Fluidigm calls to VCF'); + my @got_f = read_file($fluidigm_out); + @got_f = grep !/^[#]{2}(fileDate|reference)=/, @got_f; + my @expected_f = read_file($f_expected_vcf); + @expected_f = grep !/^[#]{2}(fileDate|reference)=/, @expected_f; + is_deeply(\@got_f, \@expected_f, + "Fluidigm VCF output matches expected values"); + my @got_s = read_file($sequenom_out); + @got_s = grep !/^[#]{2}(fileDate|reference)=/, @got_s; + my @expected_s = read_file($s_expected_vcf); + @expected_s = grep !/^[#]{2}(fileDate|reference)=/, @expected_s; + is_deeply(\@got_s, \@expected_s, + "Sequenom VCF output matches expected values"); + +} + +sub test_result_finder : Test(9) { + setup_chromosome_json(); + my $fluidigm_config = setup_fluidigm(); + my $sequenom_config = setup_sequenom_default(); + my $finder = WTSI::NPG::Genotyping::VCF::PlexResultFinder->new( + irods => $irods, + sample_ids => \@sample_ids, + subscriber_config => [$fluidigm_config, $sequenom_config], + ); + # test the write_vcf function + my $paths = $finder->write_vcf($tmp); + my $f_out_vcf = "$tmp/fluidigm_qc.vcf"; + my $s_out_vcf = "$tmp/sequenom_W30467.vcf"; + my $out_paths = [$f_out_vcf, $s_out_vcf]; + is_deeply($paths, $out_paths, "Fluidigm & Sequenom outputs returned"); + ok(-e $f_out_vcf, "Fluidigm output found"); + ok(-e $s_out_vcf, "Sequenom output found"); + my @got_f = read_file($f_out_vcf); + @got_f = grep !/^[#]{2}(fileDate|reference)=/, @got_f; + my @expected_f = read_file($f_expected_vcf); + @expected_f = grep !/^[#]{2}(fileDate|reference)=/, @expected_f; + is_deeply(\@got_f, \@expected_f, + "Fluidigm VCF output matches expected values"); + my @got_s = read_file($s_out_vcf); + @got_s = grep !/^[#]{2}(fileDate|reference)=/, @got_s; + my @expected_s = read_file($s_expected_vcf); + @expected_s = grep !/^[#]{2}(fileDate|reference)=/, @expected_s; + is_deeply(\@got_s, \@expected_s, + "Sequenom VCF output matches expected values"); + # test the write_manifests function + my $manifests = $finder->write_manifests($tmp); + my $f_manifest = "$tmp/fluidigm_qc.tsv"; + my $s_manifest = "$tmp/sequenom_W30467.tsv"; + ok(-e $f_manifest, "Fluidigm manifest found"); + ok(-e $s_manifest, "Sequenom manifest found"); + my @got_f_manifest = read_file($f_manifest); + my @expected_f_manifest = read_file(catfile($data_path, + $f_snpset_filename)); + is_deeply(\@got_f_manifest, \@expected_f_manifest, + 'Fluidigm manifest contents OK'); + my @got_s_manifest = read_file($s_manifest); + my @expected_s_manifest = read_file(catfile($data_path, + $s_snpset_filename)); + is_deeply(\@got_s_manifest, \@expected_s_manifest, + 'Sequenom manifest contents OK'); +} + +sub test_workflow_script_illuminus: Test(16) { + setup_chromosome_json(); + my $f_config = setup_fluidigm(); + my $s_config = setup_sequenom_default(); + my $workdir = abs_path(catfile($tmp, "genotype_workdir_illuminus")); + my $config_path = catfile($workdir, "config.yml"); + my $working_db = catfile($workdir, $db_file_name); + my $cmd = join q{ }, "$READY_WORKFLOW", + "--logconf $LOG_TEST_CONF", + "--dbfile $dbfile", + "--manifest $manifest", + "--run run1", + "--verbose", + "--plex_config $f_config", + "--plex_config $s_config", + "--workdir $workdir", + "--workflow illuminus"; + is(0, system($cmd), "illuminus setup exit status is zero"); + # check presence of required files and subfolders for workflow + ok(-e $workdir, "Workflow directory found"); + ok(-e $config_path, "config.yml found"); + ok(-e $working_db, "genotyping SQLite database found"); + foreach my $name (qw/in pass fail/) { + my $subdir = catfile($workdir, $name); + ok(-e $subdir && -d $subdir, "Subdirectory '$name' found"); + } + my $params_path = catfile($workdir, "in", "genotype_illuminus.yml"); + ok(-e $params_path, "genotype_illuminus.yml found"); + my $vcf_path_fluidigm = catfile($workdir, 'vcf', 'fluidigm_qc.vcf'); + my $vcf_path_sequenom = catfile($workdir, 'vcf', 'sequenom_W30467.vcf'); + ok(-e $vcf_path_fluidigm, "Fluidigm VCF file found for Illuminus"); + + my $got_fluidigm = _read_without_filedate($vcf_path_fluidigm); + my $expected_fluidigm_path = catfile($data_path, 'fluidigm.vcf'); + my $expected_fluidigm = _read_without_filedate($expected_fluidigm_path); + is_deeply($got_fluidigm, $expected_fluidigm, + "Fluidigm VCF matches expected values"); + ok(-e $vcf_path_sequenom, "Sequenom VCF file found for Illuminus"); + my $got_sequenom = _read_without_filedate($vcf_path_sequenom); + my $expected_sequenom_path = catfile($data_path, 'sequenom.vcf'); + my $expected_sequenom = _read_without_filedate($expected_sequenom_path); + is_deeply($got_sequenom, $expected_sequenom, + "Sequenom VCF matches expected values"); + # check contents of YML files + my $config = LoadFile($config_path); + ok($config, "Config data structure loaded from YML"); + my $expected_config = { + 'msg_port' => '11300', + 'max_processes' => '250', + 'root_dir' => $workdir, + 'log_level' => 'DEBUG', + 'async' => 'lsf', + 'msg_host' => 'farm3-head2', + 'log' => catfile($workdir, 'percolate.log') + }; + is_deeply($config, $expected_config, + "YML Illuminus config matches expected values"); + + my $params = LoadFile($params_path); + ok($params, "Workflow parameter data structure loaded from YML"); + my $manifest_name = fileparse($manifest); + my $fluidigm_manifest_name = 'fluidigm_qc.tsv'; + my $sequenom_manifest_name = 'sequenom_W30467.tsv'; + my $expected_params = { + 'workflow' => 'Genotyping::Workflows::GenotypeIlluminus', + 'library' => 'genotyping', + 'arguments' => [ + $working_db, + 'run1', + $workdir, + { + 'memory' => '2048', + 'manifest' => catfile($workdir, $manifest_name), + 'chunk_size' => '4000', + 'plex_manifest' => [ + catfile($workdir, 'plex_manifests', + $fluidigm_manifest_name), + catfile($workdir, 'plex_manifests', + $sequenom_manifest_name), + ], + 'vcf' => [ + $vcf_path_fluidigm, + $vcf_path_sequenom, + ], + 'gender_method' => 'Supplied' + } + ] + }; + is_deeply($params, $expected_params, + "YML Illuminus workflow params match expected values"); +} + +sub test_workflow_script_illuminus_bad_plex_reference: Test(12) { + # use a bad reference name + # Subscriber object creation fails + # VCF and plex manifest outputs are empty + setup_chromosome_json(); + my $f_config = setup_fluidigm('jabberwocky'); + my $workdir = abs_path(catfile($tmp, + "genotype_workdir_illuminus_no_plex")); + my $config_path = catfile($workdir, "config.yml"); + my $working_db = catfile($workdir, $db_file_name); + my $cmd = join q{ }, "$READY_WORKFLOW", + "--logconf $LOG_TEST_CONF", + "--dbfile $dbfile", + "--manifest $manifest", + "--run run1", + "--verbose", + "--plex_config $f_config", + "--workdir $workdir", + "--workflow illuminus", + "2> /dev/null"; # suppress chatter to stderr + is(0, system($cmd), "illuminus setup exit status is zero"); + # check presence of required files and subfolders for workflow + ok(-e $workdir, "Workflow directory found"); + ok(-e $config_path, "config.yml found"); + ok(-e $working_db, "genotyping SQLite database found"); + foreach my $name (qw/in pass fail/) { + my $subdir = catfile($workdir, $name); + ok(-e $subdir && -d $subdir, "Subdirectory '$name' found"); + } + my $params_path = catfile($workdir, "in", "genotype_illuminus.yml"); + ok(-e $params_path, "genotype_illuminus.yml found"); + # check contents of YML files + my $config = LoadFile($config_path); + ok($config, "Config data structure loaded from YML"); + my $expected_config = { + 'msg_port' => '11300', + 'max_processes' => '250', + 'root_dir' => $workdir, + 'log_level' => 'DEBUG', + 'async' => 'lsf', + 'msg_host' => 'farm3-head2', + 'log' => catfile($workdir, 'percolate.log') + }; + is_deeply($config, $expected_config, + "YML Illuminus config matches expected values"); + + my $params = LoadFile($params_path); + ok($params, "Workflow parameter data structure loaded from YML"); + my $manifest_name = fileparse($manifest); + my $expected_params = { + 'workflow' => 'Genotyping::Workflows::GenotypeIlluminus', + 'library' => 'genotyping', + 'arguments' => [ + $working_db, + 'run1', + $workdir, + { + 'memory' => '2048', + 'manifest' => catfile($workdir, $manifest_name), + 'chunk_size' => '4000', + 'plex_manifest' => [], + 'vcf' => [], + 'gender_method' => 'Supplied' + } + ] + }; + is_deeply($params, $expected_params, + "YML Illuminus workflow params match expected values"); + +} + +sub test_workflow_script_zcall: Test(16) { + setup_chromosome_json(); + my $f_config = setup_fluidigm(); + my $s_config = setup_sequenom_default(); + my $workdir = abs_path(catfile($tmp, "genotype_workdir_zcall")); + my $working_db = catfile($workdir, $db_file_name); + my $params_path = catfile($workdir, "in", "genotype_zcall.yml"); + my $cmd = join q{ }, "$READY_WORKFLOW", + "--logconf $LOG_TEST_CONF", + "--dbfile $dbfile", + "--manifest $manifest", + "--run run1", + "--verbose", + "--plex_config $f_config", + "--plex_config $s_config", + "--egt $egt", + "--zstart 6", + "--ztotal 3", + "--workdir $workdir", + "--workflow zcall"; + is(0, system($cmd), "zcall setup exit status is zero"); + ok(-e $workdir, "Workflow directory found"); + my $config_path = catfile($workdir, "config.yml"); + ok(-e $config_path, "config.yml found"); + ok(-e $working_db, "genotyping SQLite DB found"); + foreach my $name (qw/in pass fail/) { + my $subdir = catfile($workdir, $name); + ok(-e $subdir && -d $subdir, "Subdirectory '$name' found"); + } + ok(-e $params_path, "genotype_zcall.yml found"); + my $vcf_path_fluidigm = catfile($workdir, 'vcf', 'fluidigm_qc.vcf'); + my $vcf_path_sequenom = catfile($workdir, 'vcf', 'sequenom_W30467.vcf'); + ok(-e $vcf_path_fluidigm, "Fluidigm VCF file found for zCall"); + my $got_fluidigm = _read_without_filedate($vcf_path_fluidigm); + my $expected_fluidigm_path = catfile($data_path, 'fluidigm.vcf'); + my $expected_fluidigm = _read_without_filedate($expected_fluidigm_path); + is_deeply($got_fluidigm, $expected_fluidigm, + "Fluidigm VCF matches expected values"); + ok(-e $vcf_path_sequenom, "Sequenom VCF file found for zCall"); + my $got_sequenom = _read_without_filedate($vcf_path_sequenom); + my $expected_sequenom_path = catfile($data_path, 'sequenom.vcf'); + my $expected_sequenom = _read_without_filedate($expected_sequenom_path); + is_deeply($got_sequenom, $expected_sequenom, + "Sequenom VCF matches expected values"); + # check contents of YML files + my $config = LoadFile($config_path); + ok($config, "Config data structure loaded from YML"); + my $expected_config = { + 'msg_port' => '11300', + 'max_processes' => '250', + 'root_dir' => $workdir, + 'log_level' => 'DEBUG', + 'async' => 'lsf', + 'msg_host' => 'farm3-head2', + 'log' => catfile($workdir, 'percolate.log') + }; + is_deeply($config, $expected_config, + "YML zCall config matches expected values"); + my $params = LoadFile($params_path); + ok($params, "Workflow parameter data structure loaded from YML"); + my $manifest_name = fileparse($manifest); + my $fluidigm_manifest_name = 'fluidigm_qc.tsv'; + my $sequenom_manifest_name = 'sequenom_W30467.tsv'; + my $egt_name = fileparse($egt); + my $expected_params = { + 'workflow' => 'Genotyping::Workflows::GenotypeZCall', + 'library' => 'genotyping', + 'arguments' => [ + $working_db, + 'run1', + $workdir, + { + 'zstart' => '6', + 'chunk_size' => '40', + 'egt' => catfile($workdir, $egt_name), + 'vcf' => [ + $vcf_path_fluidigm, + $vcf_path_sequenom, + ], + 'memory' => '2048', + 'ztotal' => '3', + 'manifest' => catfile($workdir, $manifest_name), + 'plex_manifest' => [ + catfile($workdir, 'plex_manifests', + $fluidigm_manifest_name), + catfile($workdir, 'plex_manifests', + $sequenom_manifest_name), + ] + } + ] + }; + is_deeply($params, $expected_params, + "YML zCall workflow params match expected values"); +} + + +sub _read_without_filedate { + # read a VCF file, omitting the ##fileDate and ##reference lines + # Duplicated in VCFTest.pm + my ($inPath) = @_; + my $lines = read_file($inPath); + return _remove_filedate_reference($lines); +} + +sub _remove_filedate_reference { + # remove the fileDate and reference from a string containing VCF + # return an ArrayRef[Str] containing data + my ($vcf_str) = @_; + my @lines_in = split m/\n/msx, $vcf_str; + my @lines_out; + foreach my $line (@lines_in) { + if ( $line =~ /^[#]{2}(fileDate|reference)/msx ) { next; } + else { push(@lines_out, $line); } + } + return \@lines_out; +} + +return 1; + diff --git a/src/perl/t/WTSI/NPG/Genotyping/VCF/ReferenceFinderTest.pm b/src/perl/t/WTSI/NPG/Genotyping/VCF/ReferenceFinderTest.pm index fafa0c4e4..f84d9436f 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/VCF/ReferenceFinderTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/VCF/ReferenceFinderTest.pm @@ -3,7 +3,7 @@ package WTSI::NPG::Genotyping::VCF::ReferenceFinderTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 5; diff --git a/src/perl/t/WTSI/NPG/Genotyping/VCF/VCFTest.pm b/src/perl/t/WTSI/NPG/Genotyping/VCF/VCFTest.pm index 58d981c6f..baf51a492 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/VCF/VCFTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/VCF/VCFTest.pm @@ -1,9 +1,9 @@ -package WTSI::NPG::Genotyping::VCFTest; +package WTSI::NPG::Genotyping::VCF::VCFTest; use strict; use warnings; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Cwd qw(abs_path); use File::Path qw/make_path/; use File::Slurp qw /read_file/; @@ -118,7 +118,7 @@ sub data_row_parser_test : Test(5) { } -sub fluidigm_file_test : Test(212) { +sub fluidigm_file_test : Test(204) { my @inputs; foreach my $name (@fluidigm_csv) { push(@inputs, abs_path($data_path."/".$name)); @@ -128,7 +128,7 @@ sub fluidigm_file_test : Test(212) { reference => [ $reference_vcf_meta ], plex_type => [ $FLUIDIGM_TYPE ], plex_name => [ 'qc' ], - callset_name => [ 'fluidigm' ], + callset_name => [ 'fluidigm_qc' ], ); my @resultsets; foreach my $input (@inputs) { @@ -155,7 +155,7 @@ sub fluidigm_file_test : Test(212) { is(scalar @calls, 24, "Correct number of calls for $sample"); foreach my $call (@calls) { isa_ok($call, 'WTSI::NPG::Genotyping::Call'); - ok($call->callset_name eq 'fluidigm', "Callset name OK"); + ok($call->callset_name eq 'fluidigm_qc', "Callset name OK"); } } } @@ -176,7 +176,7 @@ sub fluidigm_irods_test : Test(7) { reference => [ $reference_vcf_meta ], plex_type => [ $FLUIDIGM_TYPE ], plex_name => [ 'qc' ], - callset_name => [ 'fluidigm' ] + callset_name => [ 'fluidigm_qc' ] ); # hash of arrayrefs for compatibility with VCF header my $parser = WTSI::NPG::Genotyping::VCF::AssayResultParser->new (resultsets => \@resultsets, @@ -272,7 +272,7 @@ sub sequenom_file_test : Test(7) { reference => [ $reference_vcf_meta ], plex_type => [ $SEQUENOM_TYPE ], plex_name => [ 'W30467' ], - callset_name => [ 'sequenom' ] + callset_name => [ 'sequenom_W30467' ] ); my $parser = WTSI::NPG::Genotyping::VCF::AssayResultParser->new (resultsets => \@resultsets, @@ -303,7 +303,7 @@ sub sequenom_irods_test : Test(7) { reference => [ $reference_vcf_meta ], plex_type => [ $SEQUENOM_TYPE ], plex_name => [ 'W30467' ], - callset_name => [ 'sequenom' ] + callset_name => [ 'sequenom_W30467' ] ); my $parser = WTSI::NPG::Genotyping::VCF::AssayResultParser->new (resultsets => \@resultsets, @@ -334,8 +334,9 @@ sub script_conversion_test : Test(3) { my $vcfOutput = "$tmp/vcf.txt"; my $snpset_ipath = $irods_tmp_coll.'/'.$sequenom_snpset_name; my $cmd = "$script --input - --vcf $vcfOutput --quiet ". - "--snpset $snpset_ipath --irods --plex_type $SEQUENOM_TYPE ". - "--callset $SEQUENOM_TYPE --repository $tmp < $sequenomList"; + "--snpset $snpset_ipath --irods --plex_type ".$SEQUENOM_TYPE." ". + "--callset ".$SEQUENOM_TYPE."_W30467 ". + "--repository $tmp < $sequenomList"; is(system($cmd), 0, "$cmd exits successfully"); ok(-e $vcfOutput, "VCF output written"); # read VCF output (omitting date) and compare to reference file @@ -506,6 +507,7 @@ sub vcf_dataset_test: Test(4) { sub _read_without_filedate { # read a VCF file, omitting the ##fileDate and ##reference lines + # duplicated in ReadyWorkflowTest.pm my ($inPath) = @_; my $lines = read_file($inPath); return _remove_filedate_reference($lines); diff --git a/src/perl/t/WTSI/NPG/Genotyping/YMLTest.pm b/src/perl/t/WTSI/NPG/Genotyping/YMLTest.pm deleted file mode 100644 index f3ad63f24..000000000 --- a/src/perl/t/WTSI/NPG/Genotyping/YMLTest.pm +++ /dev/null @@ -1,59 +0,0 @@ - -use utf8; - -package WTSI::NPG::Genotyping::YMLTest; - -use strict; -use warnings; -use File::Temp qw/tempdir/; -use YAML qw/LoadFile/; - -use base qw(Test::Class); -use Test::More tests => 8; -use Test::Exception; - -use Log::Log4perl; - -Log::Log4perl::init('./etc/log4perl_tests.conf'); - -my $dataDir = "/nfs/gapi/data/genotype/pipeline_test/"; -my $manifest = $dataDir."Human670-QuadCustom_v1_A.bpm.csv"; -my $plex_manifest = $dataDir."W30467_snp_set_info_GRCh37.tsv"; -my $egt = $dataDir."Human670-QuadCustom_v1_A.egt"; -my $config = "config.yml"; -my @workflows = qw/null illuminus zcall/; - -sub test_command_line : Test(8) { - # test exit status and outputs of command line script - my $temp = tempdir("generate_yml_test_XXXXXX", CLEANUP => 1); - my $wd = "t/genotyping_yml"; # will not write any files here, but directory should exist to avoid warnings - my $db = "genotyping_DUMMY.db"; # empty file exists in $wd - my $ref_dir = './t/genotyping_yml/'; # directory with master files - my $cmd_root = "genotyping_yml.pl --outdir $temp --run run1 ". - "--workdir $wd -dbfile $db"; - foreach my $workflow (@workflows) { - my $cmd; - if ($workflow eq 'null') { $cmd = $cmd_root; } - else { $cmd = $cmd_root." --workflow $workflow --manifest $manifest". - " --plex_manifest $plex_manifest"; } - if ($workflow eq 'zcall') { $cmd .= " --egt $egt"; } - is(0,system($cmd),"genotyping_yml.pl exit status, ". - $workflow." workflow"); - # validate config.yml and workflow file - my $configPath = $temp.'/'.$config; - my $configMaster = $ref_dir.$config; - is_deeply(LoadFile($configPath), LoadFile($configMaster), - "Config YML data structure equivalent to master"); - - # validate the workflow .yml (if any) - if ($workflow ne 'null') { - my $output = $temp."/genotype_".$workflow.".yml"; - my $master = $ref_dir."genotype_".$workflow.".yml"; - is_deeply(LoadFile($output), LoadFile($master), - "YML data structure equivalent to master, ". - $workflow." workflow"); - } - } -} - -1; diff --git a/src/perl/t/WTSI/NPG/PublisherTest.pm b/src/perl/t/WTSI/NPG/PublisherTest.pm index 509e37132..eb7dd7951 100644 --- a/src/perl/t/WTSI/NPG/PublisherTest.pm +++ b/src/perl/t/WTSI/NPG/PublisherTest.pm @@ -6,7 +6,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 10; use Test::Exception; @@ -90,5 +90,19 @@ sub publish_file : Test(8) { ok($updated_lorem_obj->validate_checksum_metadata, 'New md5 metadata matches file'); - ok(!$lorem_obj->is_present, 'Update moved data object'); + my $lorem_obj_presence = $lorem_obj->is_present; + ok(!$lorem_obj_presence, 'Update moved data object'); + + # calling $lorem_obj->is_present raises warnings (because the object is + # not present). These warnings are written to the test log. Calling + # is_present *outside* the test assertion (as above) behaves as + # expected. + # + # If the call is made *within* the test assertion, for example: + # ok(!$lorem_obj->is_present, 'Update moved data object'); + # then the warnings are also printed to the terminal. The reason + # for this is unclear, as the test log should be configured to direct + # them to the logfile. For the time being, the is_present call has been + # left outside the assertion to suppress unwanted output in the terminal + # window. } diff --git a/src/perl/t/WTSI/NPG/SimplePublisherTest.pm b/src/perl/t/WTSI/NPG/SimplePublisherTest.pm index d902f1a28..32c28fa1b 100644 --- a/src/perl/t/WTSI/NPG/SimplePublisherTest.pm +++ b/src/perl/t/WTSI/NPG/SimplePublisherTest.pm @@ -6,7 +6,7 @@ use strict; use warnings; use DateTime; -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 7; use Test::Exception; diff --git a/src/perl/t/WTSI/NPG/Test.pm b/src/perl/t/WTSI/NPG/Test.pm new file mode 100644 index 000000000..64707809c --- /dev/null +++ b/src/perl/t/WTSI/NPG/Test.pm @@ -0,0 +1,41 @@ +package WTSI::NPG::Test; + +use strict; +use warnings; + +use base qw(Test::Class); +use Test::More; + +# Run full tests (requiring a test iRODS server) only if TEST_AUTHOR +# is true. If full tests are run, require that irodsEnvFile be set. +sub runtests { + my ($self) = @_; + + my $env_file = $ENV{'WTSI_NPG_iRODS_Test_irodsEnvFile'} || q{}; + if (not $env_file) { + if ($ENV{TEST_AUTHOR}) { + die 'Environment variable WTSI_NPG_iRODS_Test_irodsEnvFile was not set'; + } + else { + $self->SKIP_CLASS('TEST_AUTHOR environment variable is false'); + } + } + + my %env_copy = %ENV; + # Ensure that the iRODS connection details are a nonsense value if + # they are not set explicitly via WTSI_NPG_iRODS_Test_irodsEnvFile + $env_copy{'irodsEnvFile'} = $env_file || 'DUMMY_VALUE'; + + { + local %ENV = %env_copy; + return $self->SUPER::runtests; + } +} + +# If any test methods fail to complete, count all their remaining +# tests as failures. +sub fail_if_returned_early { + return 1; +} + +1; diff --git a/src/perl/t/WTSI/NPG/UtilitiesTest.pm b/src/perl/t/WTSI/NPG/UtilitiesTest.pm index 4c6f6f4ac..8e2b6222d 100644 --- a/src/perl/t/WTSI/NPG/UtilitiesTest.pm +++ b/src/perl/t/WTSI/NPG/UtilitiesTest.pm @@ -7,7 +7,7 @@ use strict; use warnings; use File::Temp qw(tempfile); -use base qw(Test::Class); +use base qw(WTSI::NPG::Test); use Test::More tests => 810; use Test::Exception; diff --git a/src/perl/t/call.t b/src/perl/t/call.t index 0e6a4af50..f71710fcb 100644 --- a/src/perl/t/call.t +++ b/src/perl/t/call.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::CallTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::CallTest->runtests; diff --git a/src/perl/t/collation.t b/src/perl/t/collation.t index d012a49a1..1a4dbec71 100644 --- a/src/perl/t/collation.t +++ b/src/perl/t/collation.t @@ -1,103 +1,9 @@ -# Author: Iain Bancarz, ib5@sanger.ac.uk -# February 2014 + +use utf8; use strict; use warnings; -use Carp; -use Digest::MD5; -use File::Temp qw/tempdir/; -use FindBin qw($Bin); -use Test::More tests => 15; -use JSON; - -use WTSI::NPG::Genotyping::QC::Collation qw(collate); -use WTSI::NPG::Genotyping::QC::QCPlotShared qw(readFileToString readSampleInclusion); - -my $tempdir = tempdir("/tmp/qc_report_tXXXXXX", CLEANUP => 1); - -my ($dir, $inputDir, $configPath, $dbPath, $iniPath, $jsonResults, - $jsonMetrics, $resultsMaster, $metricsMaster, $verbose, - $csvPath, $exclude, $jsonMaster, $thresholdPath, $dbName, $md5, $fh); - -$dbName = 'small_test.db'; -$dir = "$Bin/qc_test_data/"; -$inputDir = $dir.'output_examples/'; -$configPath = $dir.'config_test.json'; -$thresholdPath = $configPath; -$dbPath = $dir.$dbName; -$iniPath = $ENV{HOME} . "/.npg/genotyping.ini"; -$jsonResults = $tempdir.'/qc_results.json'; -$jsonMetrics = $tempdir.'/qc_metrics.json'; -$csvPath = $tempdir.'/qc_results.csv'; -$exclude = 0; -$verbose = 0; -$resultsMaster = $inputDir.'/qc_results.json'; -$metricsMaster = $inputDir.'/qc_metrics.json'; - -collate($inputDir, $configPath, $thresholdPath, $dbPath, $iniPath, - $jsonResults, $jsonMetrics, $csvPath, $exclude, 0, $verbose); -checkOutputs($jsonMetrics, $metricsMaster, $jsonResults, $resultsMaster, $csvPath); -system("rm -Rf $tempdir/*"); # remove output from previous tests -print "Removed output from previous tests; now testing main script.\n"; -my $dbTemp = $tempdir.'/'.$dbName; # apply sample exclusion to temporary DB -system("cp $dbPath $dbTemp"); -my $cmd = "collate_qc_results.pl --input $inputDir --status $jsonResults --dbpath $dbTemp --csv $csvPath --metrics $jsonMetrics --config $configPath --exclude"; -is(0, system($cmd), 'Command-line script exit status OK'); -checkOutputs($jsonMetrics, $metricsMaster, $jsonResults, $resultsMaster, $csvPath); - -# check for sample exclusion in database -my $exclPath = $inputDir."/qc_exclusions.json"; -my $expected = decode_json(readFileToString($exclPath)); -my $excluded = readSampleInclusion($dbTemp); -is_deeply($excluded, $expected, "Sample inclusion status in pipeline DB"); - -# verify database checksum -$md5 = Digest::MD5->new; -open $fh, "<", $dbTemp || croak "Cannot open temporary DB $dbTemp"; -binmode($fh); -while (<$fh>) { $md5->add($_); } -close $fh || croak "Cannot close temporary DB $dbTemp"; -is($md5->hexdigest, '1088e683a09f281a62c4100ea7ff40ae', - "MD5 checksum of DB after sample exclusion"); - -sub checkOutputs { - # validate the expected collation output files - # runs a total of 6 tests - my ($jsonMetrics, $metricsMaster, $jsonResults, $resultsMaster, $csvPath) = @_; - ok(-e $jsonMetrics, "JSON metrics path exists"); - my $output = decode_json(readFileToString($jsonMetrics)); - my $master = decode_json(readFileToString($metricsMaster)); - is_deeply($output, $master, "JSON metrics data equivalent to master copy"); - ok(-e $jsonResults, "JSON results path exists"); - $output = decode_json(readFileToString($jsonResults)); - $master = decode_json(readFileToString($resultsMaster)); - is_deeply($output, $master, "JSON results data equivalent to master copy"); - ok(-e $csvPath, "CSV results path exists"); - ok(checkCsv($csvPath, 101, 33), "Correct row/column totals in .csv file"); -} +use WTSI::NPG::Genotyping::QC::CollationTest; -sub checkCsv { - my ($inPath, $expectedRows, $expectedCols) = @_; - my $rows = 0; - my $ok = 1; - open my $in, "<", $inPath || croak "Cannot open input $inPath"; - while (<$in>) { - $rows++; - chomp; - my @fields = split(/,/); - my $cols = scalar(@fields); - if ($cols!=$expectedCols) { - print STDERR "Expected $expectedCols .csv columns, ". - "found $cols at line $rows\n"; - $ok = 0; - last; - } - } - close $in || croak "Cannot close input $inPath"; - if ($ok==1 && $rows!=$expectedRows) { - print STDERR "Expected $expectedRows .csv rows, found $rows\n"; - $ok = 0; - } - return $ok; -} +WTSI::NPG::Genotyping::QC::CollationTest->runtests; diff --git a/src/perl/t/database_infinium.t b/src/perl/t/database_infinium.t index 3ca1b35c0..960d01873 100644 --- a/src/perl/t/database_infinium.t +++ b/src/perl/t/database_infinium.t @@ -3,6 +3,7 @@ use utf8; use strict; use warnings; + use WTSI::NPG::Genotyping::Database::InfiniumTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Database::InfiniumTest->runtests; diff --git a/src/perl/t/database_ml_warehouse.t b/src/perl/t/database_ml_warehouse.t index 60d5bf0e5..f74c661d0 100644 --- a/src/perl/t/database_ml_warehouse.t +++ b/src/perl/t/database_ml_warehouse.t @@ -1,6 +1,9 @@ +use utf8; + use strict; use warnings; + use WTSI::NPG::Database::MLWarehouseTest; -Test::Class->runtests; +WTSI::NPG::Database::MLWarehouseTest->runtests; diff --git a/src/perl/t/database_pipeline.t b/src/perl/t/database_pipeline.t index 8852e5312..4234bd8a2 100644 --- a/src/perl/t/database_pipeline.t +++ b/src/perl/t/database_pipeline.t @@ -3,6 +3,7 @@ use utf8; use strict; use warnings; + use WTSI::NPG::Genotyping::Database::PipelineTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Database::PipelineTest->runtests; diff --git a/src/perl/t/database_sequenom.t b/src/perl/t/database_sequenom.t index f1399ed56..f01c80e43 100644 --- a/src/perl/t/database_sequenom.t +++ b/src/perl/t/database_sequenom.t @@ -3,6 +3,7 @@ use utf8; use strict; use warnings; + use WTSI::NPG::Genotyping::Database::SequenomTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Database::SequenomTest->runtests; diff --git a/src/perl/t/database_snp.t b/src/perl/t/database_snp.t index fcf588d28..b916ff669 100644 --- a/src/perl/t/database_snp.t +++ b/src/perl/t/database_snp.t @@ -3,6 +3,7 @@ use utf8; use strict; use warnings; + use WTSI::NPG::Genotyping::Database::SNPTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Database::SNPTest->runtests; diff --git a/src/perl/t/database_warehouse.t b/src/perl/t/database_warehouse.t index 946f60452..2351acfb6 100644 --- a/src/perl/t/database_warehouse.t +++ b/src/perl/t/database_warehouse.t @@ -3,6 +3,7 @@ use utf8; use strict; use warnings; + use WTSI::NPG::Database::WarehouseTest; -Test::Class->runtests; +WTSI::NPG::Database::WarehouseTest->runtests; diff --git a/src/perl/t/expression_analysis_publisher.t b/src/perl/t/expression_analysis_publisher.t index 100774297..7ce61cd4a 100644 --- a/src/perl/t/expression_analysis_publisher.t +++ b/src/perl/t/expression_analysis_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::AnalysisPublisherTest; -Test::Class->runtests; +WTSI::NPG::Expression::AnalysisPublisherTest->runtests; diff --git a/src/perl/t/expression_chip_loading_manifest.t b/src/perl/t/expression_chip_loading_manifest.t index 9511660e8..b169b9bd6 100644 --- a/src/perl/t/expression_chip_loading_manifest.t +++ b/src/perl/t/expression_chip_loading_manifest.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::ChipLoadingManifestTest; -Test::Class->runtests; \ No newline at end of file +WTSI::NPG::Expression::ChipLoadingManifestTest->runtests; diff --git a/src/perl/t/expression_data_object.t b/src/perl/t/expression_data_object.t index 370dbbe9a..d33d5c4e6 100644 --- a/src/perl/t/expression_data_object.t +++ b/src/perl/t/expression_data_object.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::InfiniumDataObjectTest; -Test::Class->runtests; +WTSI::NPG::Expression::InfiniumDataObjectTest->runtests; diff --git a/src/perl/t/expression_profile_annotation.t b/src/perl/t/expression_profile_annotation.t index b54bd4ae0..4c3f32e67 100644 --- a/src/perl/t/expression_profile_annotation.t +++ b/src/perl/t/expression_profile_annotation.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::ProfileAnnotationTest; -Test::Class->runtests; +WTSI::NPG::Expression::ProfileAnnotationTest->runtests; diff --git a/src/perl/t/expression_publisher.t b/src/perl/t/expression_publisher.t index 640667027..d64aac258 100644 --- a/src/perl/t/expression_publisher.t +++ b/src/perl/t/expression_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::PublisherTest; -Test::Class->runtests; +WTSI::NPG::Expression::PublisherTest->runtests; diff --git a/src/perl/t/expression_resultset.t b/src/perl/t/expression_resultset.t index dc565db76..53315d0a9 100644 --- a/src/perl/t/expression_resultset.t +++ b/src/perl/t/expression_resultset.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::ResultSetTest; -Test::Class->runtests; +WTSI::NPG::Expression::ResultSetTest->runtests; diff --git a/src/perl/t/expression_sample_probe_profile.t b/src/perl/t/expression_sample_probe_profile.t index 83c50f118..bbb4d2a2d 100644 --- a/src/perl/t/expression_sample_probe_profile.t +++ b/src/perl/t/expression_sample_probe_profile.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Expression::SampleProbeProfileTest; -Test::Class->runtests; +WTSI::NPG::Expression::SampleProbeProfileTest->runtests; diff --git a/src/perl/t/fluidigm_assay_data_object.t b/src/perl/t/fluidigm_assay_data_object.t index 39d860d4a..f0594ae37 100644 --- a/src/perl/t/fluidigm_assay_data_object.t +++ b/src/perl/t/fluidigm_assay_data_object.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::AssayDataObjectTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::AssayDataObjectTest->runtests; diff --git a/src/perl/t/fluidigm_assay_result.t b/src/perl/t/fluidigm_assay_result.t index 542991253..89824a07e 100644 --- a/src/perl/t/fluidigm_assay_result.t +++ b/src/perl/t/fluidigm_assay_result.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::AssayResultTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::AssayResultTest->runtests; diff --git a/src/perl/t/fluidigm_assay_resultset.t b/src/perl/t/fluidigm_assay_resultset.t index a4674644b..201e9642d 100644 --- a/src/perl/t/fluidigm_assay_resultset.t +++ b/src/perl/t/fluidigm_assay_resultset.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::AssayResultSetTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::AssayResultSetTest->runtests; diff --git a/src/perl/t/fluidigm_export_file.t b/src/perl/t/fluidigm_export_file.t index a32f1fbb0..ebea349d9 100644 --- a/src/perl/t/fluidigm_export_file.t +++ b/src/perl/t/fluidigm_export_file.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::ExportFileTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::ExportFileTest->runtests; diff --git a/src/perl/t/fluidigm_publisher.t b/src/perl/t/fluidigm_publisher.t index 48a2e2019..602ef4523 100644 --- a/src/perl/t/fluidigm_publisher.t +++ b/src/perl/t/fluidigm_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::PublisherTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::PublisherTest->runtests; diff --git a/src/perl/t/fluidigm_resultset.t b/src/perl/t/fluidigm_resultset.t index 2ad6ac29c..4d8110929 100644 --- a/src/perl/t/fluidigm_resultset.t +++ b/src/perl/t/fluidigm_resultset.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::ResultSetTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::ResultSetTest->runtests; diff --git a/src/perl/t/fluidigm_subscriber.t b/src/perl/t/fluidigm_subscriber.t index 05fbd6c62..adc79e860 100644 --- a/src/perl/t/fluidigm_subscriber.t +++ b/src/perl/t/fluidigm_subscriber.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Fluidigm::SubscriberTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Fluidigm::SubscriberTest->runtests; diff --git a/src/perl/t/fluidigm_subscriber/chromosome_lengths_GRCh37.json b/src/perl/t/fluidigm_subscriber/chromosome_lengths_GRCh37.json new file mode 100644 index 000000000..8d6e46bda --- /dev/null +++ b/src/perl/t/fluidigm_subscriber/chromosome_lengths_GRCh37.json @@ -0,0 +1 @@ +{"11":135006516,"21":48129895,"7":159138663,"Y":59373566,"17":81195210,"2":243199373,"22":51304566,"1":249250621,"18":78077248,"16":90354753,"13":115169878,"6":171115067,"X":155270560,"3":198022430,"9":141213431,"12":133851895,"20":63025520,"14":107349540,"15":102531392,"8":146364022,"4":191154276,"19":59128983,"10":135534747,"5":180915260} \ No newline at end of file diff --git a/src/perl/t/gender_marker.t b/src/perl/t/gender_marker.t index d938e5bdf..950b45f80 100644 --- a/src/perl/t/gender_marker.t +++ b/src/perl/t/gender_marker.t @@ -6,5 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::GenderMarkerTest; -Test::Class->runtests; - +WTSI::NPG::Genotyping::GenderMarkerTest->runtests; diff --git a/src/perl/t/gender_marker_call.t b/src/perl/t/gender_marker_call.t index bdfeaa154..3faa00738 100644 --- a/src/perl/t/gender_marker_call.t +++ b/src/perl/t/gender_marker_call.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::GenderMarkerCallTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::GenderMarkerCallTest->runtests; diff --git a/src/perl/t/genotyping_yml.t b/src/perl/t/genotyping_yml.t deleted file mode 100644 index 1def904bb..000000000 --- a/src/perl/t/genotyping_yml.t +++ /dev/null @@ -1,11 +0,0 @@ - -# Tests generate_yml.pl - -use utf8; -use strict; -use warnings; - -use WTSI::NPG::Genotyping::YMLTest; - -Test::Class->runtests; - diff --git a/src/perl/t/genotyping_yml/config.yml b/src/perl/t/genotyping_yml/config.yml deleted file mode 100644 index 3e546c4b0..000000000 --- a/src/perl/t/genotyping_yml/config.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -async: lsf -log: t/genotyping_yml/percolate.log -log_level: DEBUG -max_processes: 250 -msg_host: farm3-head2 -msg_port: 11300 -root_dir: t/genotyping_yml/ diff --git a/src/perl/t/genotyping_yml/genotype_illuminus.yml b/src/perl/t/genotyping_yml/genotype_illuminus.yml deleted file mode 100644 index f1cc9b9b8..000000000 --- a/src/perl/t/genotyping_yml/genotype_illuminus.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -arguments: - - t/genotyping_yml/genotyping_DUMMY.db - - run1 - - t/genotyping_yml/ - - chunk_size: 4000 - gender_method: Supplied - manifest: /nfs/gapi/data/genotype/pipeline_test/Human670-QuadCustom_v1_A.bpm.csv - plex_manifest: /nfs/gapi/data/genotype/pipeline_test/W30467_snp_set_info_GRCh37.tsv - memory: 2048 -library: genotyping -workflow: Genotyping::Workflows::GenotypeIlluminus diff --git a/src/perl/t/genotyping_yml/genotype_zcall.yml b/src/perl/t/genotyping_yml/genotype_zcall.yml deleted file mode 100644 index 7b9407a3b..000000000 --- a/src/perl/t/genotyping_yml/genotype_zcall.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -arguments: - - t/genotyping_yml/genotyping_DUMMY.db - - run1 - - t/genotyping_yml/ - - chunk_size: 40 - egt: /nfs/gapi/data/genotype/pipeline_test/Human670-QuadCustom_v1_A.egt - manifest: /nfs/gapi/data/genotype/pipeline_test/Human670-QuadCustom_v1_A.bpm.csv - plex_manifest: /nfs/gapi/data/genotype/pipeline_test/W30467_snp_set_info_GRCh37.tsv - memory: 2048 - zstart: 7 - ztotal: 1 -library: genotyping -workflow: Genotyping::Workflows::GenotypeZCall diff --git a/src/perl/t/genotyping_yml/genotyping_DUMMY.db b/src/perl/t/genotyping_yml/genotyping_DUMMY.db deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/perl/t/identity.t b/src/perl/t/identity.t index 33b376dc2..21ae34a06 100644 --- a/src/perl/t/identity.t +++ b/src/perl/t/identity.t @@ -1,14 +1,9 @@ -# Tests generate_yml.pl - use utf8; + use strict; use warnings; use WTSI::NPG::Genotyping::QC::IdentityTest; -# Created a cut-down PLINK dataset (20 SNPs, 5 samples) -# see gapi/genotype_identity_test.git on http://git.internal.sanger.ac.uk -# data contains some "real" samples and calls, so not made public on github - -Test::Class->runtests; +WTSI::NPG::Genotyping::QC::IdentityTest->runtests; diff --git a/src/perl/t/identity_check_sample_wip_bayesian.t b/src/perl/t/identity_check_sample_wip_bayesian.t index 1bfdb4be2..09cad84e0 100644 --- a/src/perl/t/identity_check_sample_wip_bayesian.t +++ b/src/perl/t/identity_check_sample_wip_bayesian.t @@ -1,9 +1,9 @@ +use utf8; + use strict; use warnings; use WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesianTest; -# Test of Bayesian identity check - -Test::Class->runtests; \ No newline at end of file +WTSI::NPG::Genotyping::QC_wip::Check::SampleIdentityBayesianTest->runtests; diff --git a/src/perl/t/identity_check_wip.t b/src/perl/t/identity_check_wip.t index c0037e656..11c3f0159 100644 --- a/src/perl/t/identity_check_wip.t +++ b/src/perl/t/identity_check_wip.t @@ -1,11 +1,9 @@ +use utf8; + use strict; use warnings; use WTSI::NPG::Genotyping::QC_wip::Check::IdentityTest; -# Created a cut-down PLINK dataset (20 SNPs, 5 samples) -# see gapi/genotype_identity_test.git on http://git.internal.sanger.ac.uk -# data contains some "real" samples and calls, so not made public on github - -Test::Class->runtests; +WTSI::NPG::Genotyping::QC_wip::Check::IdentityTest->runtests; diff --git a/src/perl/t/identity_simulation_wip.t b/src/perl/t/identity_simulation_wip.t new file mode 100644 index 000000000..cc55c362b --- /dev/null +++ b/src/perl/t/identity_simulation_wip.t @@ -0,0 +1,7 @@ + +use strict; +use warnings; + +use WTSI::NPG::Genotyping::QC_wip::Check::IdentitySimulatorTest; + +Test::Class->runtests; \ No newline at end of file diff --git a/src/perl/t/illuminus.t b/src/perl/t/illuminus.t index 006ec27fa..2f2677ea0 100644 --- a/src/perl/t/illuminus.t +++ b/src/perl/t/illuminus.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::IlluminusTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::IlluminusTest->runtests; diff --git a/src/perl/t/infinium_analysis_publisher.t b/src/perl/t/infinium_analysis_publisher.t index 4604fe006..b6100b6d1 100644 --- a/src/perl/t/infinium_analysis_publisher.t +++ b/src/perl/t/infinium_analysis_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Infinium::AnalysisPublisherTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Infinium::AnalysisPublisherTest->runtests; diff --git a/src/perl/t/infinium_data_object.t b/src/perl/t/infinium_data_object.t index 43772149b..94dee87ed 100644 --- a/src/perl/t/infinium_data_object.t +++ b/src/perl/t/infinium_data_object.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Infinium::InfiniumDataObjectTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Infinium::InfiniumDataObjectTest->runtests; diff --git a/src/perl/t/infinium_publisher.t b/src/perl/t/infinium_publisher.t index 453fad997..db2738ae6 100644 --- a/src/perl/t/infinium_publisher.t +++ b/src/perl/t/infinium_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Infinium::PublisherTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Infinium::PublisherTest->runtests; diff --git a/src/perl/t/infinium_resultset.t b/src/perl/t/infinium_resultset.t index 019b727a6..0e361ee0d 100644 --- a/src/perl/t/infinium_resultset.t +++ b/src/perl/t/infinium_resultset.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Infinium::ResultSetTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Infinium::ResultSetTest->runtests; diff --git a/src/perl/t/publisher.t b/src/perl/t/publisher.t index 6b46ddd6f..77fcd97b8 100644 --- a/src/perl/t/publisher.t +++ b/src/perl/t/publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::PublisherTest; -Test::Class->runtests; +WTSI::NPG::PublisherTest->runtests; diff --git a/src/perl/t/qc/check/identity/combined_identity_expected.json b/src/perl/t/qc/check/identity/combined_identity_expected.json index f1070cb17..ae552e801 100644 --- a/src/perl/t/qc/check/identity/combined_identity_expected.json +++ b/src/perl/t/qc/check/identity/combined_identity_expected.json @@ -1 +1 @@ -{"params":{"pass_threshold":0.85,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"rs6759892":0.40625,"GS35220":0.40625,"rs753381":0.40625,"rs7627615":0.40625,"rs6166":0.40625,"rs1805034":0.40625,"rs8065080":0.40625,"rs4075254":0.40625,"rs4925":0.40625,"rs1805087":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":3,"assayed_pass_rate":"0.6667","total":6,"failed":1},"identity":[{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["GA","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["CT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["TT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[["CC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["TT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["CC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["GA","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[["GA","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["AA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["AA","1","callset_foo"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["TT","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["GA","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["GA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["TT","1","callset_foo"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["GA","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["GA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[],"production":["NN","0","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null}],"swap":{"comparison":[],"sample_warnings":{},"total_samples_checked":1,"prior":0,"total_sample_warnings":0}} \ No newline at end of file +{"params":{"pass_threshold":0.99,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"rs6759892":0.40625,"GS35220":0.40625,"rs753381":0.40625,"rs7627615":0.40625,"rs6166":0.40625,"rs1805034":0.40625,"rs8065080":0.40625,"rs4075254":0.40625,"rs4925":0.40625,"rs1805087":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":3,"assayed_pass_rate":"0.6667","total":6,"failed":1},"identity":[{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["GA","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["CT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["TT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[["CC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["TT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["CC","1","callset_bar"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["TT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["GG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","callset_bar"]],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["GA","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[["GA","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["AA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["AA","1","callset_foo"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["TT","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["GA","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["GA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["TT","1","callset_foo"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["GA","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","callset_bar"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["GA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[],"production":["NN","0","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs753381":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs6166":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs8065080":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4075254":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4925":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs532841":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null}],"swap":{"comparison":[],"sample_warnings":{},"total_samples_checked":1,"prior":0,"total_sample_warnings":0}} \ No newline at end of file diff --git a/src/perl/t/qc/check/identity/expected_omit_results.json b/src/perl/t/qc/check/identity/expected_omit_results.json index 7a38aae08..ad728eedb 100644 --- a/src/perl/t/qc/check/identity/expected_omit_results.json +++ b/src/perl/t/qc/check/identity/expected_omit_results.json @@ -1 +1 @@ -{"params":{"pass_threshold":0.85,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634_BROKEN":0.40625,"rs4925_BROKEN":0.40625,"rs6166_BROKEN":0.40625,"rs649058_BROKEN":0.40625,"rs7627615_BROKEN":0.40625,"GS35220_BROKEN":0.40625,"rs4843075_BROKEN":0.40625,"rs1805034_BROKEN":0.40625,"rs1805087":0.40625,"rs4075254_BROKEN":0.40625,"rs4619_BROKEN":0.40625,"rs2247870_BROKEN":0.40625,"rs5215_BROKEN":0.40625,"rs6759892_BROKEN":0.40625,"rs753381_BROKEN":0.40625,"rs7298565_BROKEN":0.40625,"rs12828016_BROKEN":0.40625,"rs2241714":0.40625,"rs11096957_BROKEN":0.40625,"rs532841_BROKEN":0.40625,"rs8065080_BROKEN":0.40625,"rs3742207_BROKEN":0.40625,"rs3795677_BROKEN":0.40625,"rs1801262_BROKEN":0.40625,"rs2286963_BROKEN":0.40625,"rs156697_BROKEN":0.40625,"GS35219_BROKEN":0.40625,"GS35205_BROKEN":0.40625,"rs1131498_BROKEN":0.40625,"GS34251_BROKEN":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":3,"assayed_pass_rate":"0.3333","total":6,"failed":2},"identity":[{"concordance":"1.0000","identity":"0.9983","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs2241714":{"qc":[["CT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.0000","identity":"0.0273","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs2241714":{"qc":[["TT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]}},"failed":1},{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs2241714":{"qc":[["GA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["AA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs2241714":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["NN","0","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null}],"swap":{"comparison":[["urn:wtsi:249461_G12_HELIC5215300","urn:wtsi:249442_C09_HELIC5102247","0.0394",0]],"sample_warnings":{},"total_samples_checked":2,"prior":0.5,"total_sample_warnings":0}} \ No newline at end of file +{"params":{"pass_threshold":0.99,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634_BROKEN":0.40625,"rs4925_BROKEN":0.40625,"rs6166_BROKEN":0.40625,"rs649058_BROKEN":0.40625,"rs7627615_BROKEN":0.40625,"GS35220_BROKEN":0.40625,"rs4843075_BROKEN":0.40625,"rs1805034_BROKEN":0.40625,"rs1805087":0.40625,"rs4075254_BROKEN":0.40625,"rs4619_BROKEN":0.40625,"rs2247870_BROKEN":0.40625,"rs5215_BROKEN":0.40625,"rs6759892_BROKEN":0.40625,"rs753381_BROKEN":0.40625,"rs7298565_BROKEN":0.40625,"rs12828016_BROKEN":0.40625,"rs2241714":0.40625,"rs11096957_BROKEN":0.40625,"rs532841_BROKEN":0.40625,"rs8065080_BROKEN":0.40625,"rs3742207_BROKEN":0.40625,"rs3795677_BROKEN":0.40625,"rs1801262_BROKEN":0.40625,"rs2286963_BROKEN":0.40625,"rs156697_BROKEN":0.40625,"GS35219_BROKEN":0.40625,"GS35205_BROKEN":0.40625,"rs1131498_BROKEN":0.40625,"GS34251_BROKEN":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":3,"assayed_pass_rate":"0.3333","total":6,"failed":2},"identity":[{"concordance":"1.0000","identity":"0.9983","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs2241714":{"qc":[["CT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.0000","identity":"0.0273","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs2241714":{"qc":[["TT","1","callset_bar"]],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","callset_foo"]],"production":["AG","1","_unknown_callset_"]}},"failed":1},{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs2241714":{"qc":[["GA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["AA","1","callset_bar"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs2241714":{"qc":[],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["NN","0","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null},{"concordance":"0.0000","identity":"0.0000","missing":1,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs2241714":{"qc":[],"production":["AG","1","_unknown_callset_"]},"rs1805087":{"qc":[],"production":["AG","1","_unknown_callset_"]}},"failed":null}],"swap":{"comparison":[["urn:wtsi:249461_G12_HELIC5215300","urn:wtsi:249442_C09_HELIC5102247","0.0394",0]],"sample_warnings":{},"total_samples_checked":2,"prior":0.5,"total_sample_warnings":0}} \ No newline at end of file diff --git a/src/perl/t/qc/check/identity/identity_script_output.json b/src/perl/t/qc/check/identity/identity_script_output.json index 0f5d47727..14169c7b2 100644 --- a/src/perl/t/qc/check/identity/identity_script_output.json +++ b/src/perl/t/qc/check/identity/identity_script_output.json @@ -1 +1 @@ -{"params":{"pass_threshold":0.85,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"GS35220":0.40625,"rs6759892":0.40625,"rs6166":0.40625,"rs7627615":0.40625,"rs753381":0.40625,"rs1805034":0.40625,"rs4075254":0.40625,"rs1805087":0.40625,"rs4925":0.40625,"rs8065080":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":0,"assayed_pass_rate":"0.8333","total":6,"failed":1},"identity":[{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.8000","identity":"0.9249","missing":0,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TG","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0}],"swap":{"comparison":[],"sample_warnings":{},"total_samples_checked":1,"prior":0,"total_sample_warnings":0}} \ No newline at end of file +{"params":{"pass_threshold":0.99,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"GS35220":0.40625,"rs6759892":0.40625,"rs6166":0.40625,"rs7627615":0.40625,"rs753381":0.40625,"rs1805034":0.40625,"rs4075254":0.40625,"rs1805087":0.40625,"rs4925":0.40625,"rs8065080":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":0,"assayed_pass_rate":"0.6667","total":6,"failed":2},"identity":[{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.8000","identity":"0.9249","missing":0,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TG","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":1}],"swap":{"comparison":[["urn:wtsi:249470_F02_HELIC5274730","urn:wtsi:000000_A00_DUMMY-SAMPLE","0.0000",0]],"sample_warnings":{},"total_samples_checked":2,"prior":0.5,"total_sample_warnings":0}} \ No newline at end of file diff --git a/src/perl/t/qc/check/identity/identity_script_output_2.json b/src/perl/t/qc/check/identity/identity_script_output_2.json index 0f5d47727..763556ab1 100644 --- a/src/perl/t/qc/check/identity/identity_script_output_2.json +++ b/src/perl/t/qc/check/identity/identity_script_output_2.json @@ -1 +1 @@ -{"params":{"pass_threshold":0.85,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"GS35220":0.40625,"rs6759892":0.40625,"rs6166":0.40625,"rs7627615":0.40625,"rs753381":0.40625,"rs1805034":0.40625,"rs4075254":0.40625,"rs1805087":0.40625,"rs4925":0.40625,"rs8065080":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":0,"assayed_pass_rate":"0.8333","total":6,"failed":1},"identity":[{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.8000","identity":"0.9249","missing":0,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TG","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0}],"swap":{"comparison":[],"sample_warnings":{},"total_samples_checked":1,"prior":0,"total_sample_warnings":0}} \ No newline at end of file +{"params":{"pass_threshold":0.99,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"GS35220":0.40625,"rs6759892":0.40625,"rs6166":0.40625,"rs7627615":0.40625,"rs753381":0.40625,"rs1805034":0.40625,"rs4075254":0.40625,"rs1805087":0.40625,"rs4925":0.40625,"rs8065080":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":0.01,"swap_threshold":0.5},"summary":{"missing":0,"assayed_pass_rate":"0.8333","total":6,"failed":1},"identity":[{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.8000","identity":"0.9249","missing":0,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TG","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0}],"swap":{"comparison":[],"sample_warnings":{},"total_samples_checked":1,"prior":0,"total_sample_warnings":0}} \ No newline at end of file diff --git a/src/perl/t/qc/check/identity/identity_script_output_alternate_prior.json b/src/perl/t/qc/check/identity/identity_script_output_alternate_prior.json new file mode 100644 index 000000000..1c74c17a7 --- /dev/null +++ b/src/perl/t/qc/check/identity/identity_script_output_alternate_prior.json @@ -0,0 +1 @@ +{"params":{"pass_threshold":0.9,"expected_error_rate":0.01,"equivalent_calls_probability":{"rs6557634":0.40625,"GS35220":0.40625,"rs6759892":0.40625,"rs6166":0.40625,"rs7627615":0.40625,"rs753381":0.40625,"rs1805034":0.40625,"rs4075254":0.40625,"rs1805087":0.40625,"rs4925":0.40625,"rs8065080":0.40625,"rs1131498":0.40625,"rs532841":0.40625,"rs2286963":0.40625,"rs2247870":0.40625,"rs649058":0.40625,"rs2241714":0.40625,"rs3742207":0.40625,"GS34251":0.40625,"rs3795677":0.40625,"GS35205":0.40625,"rs7298565":0.40625,"rs1801262":0.40625,"GS35219":0.40625,"rs5215":0.40625,"rs4619":0.40625,"rs4843075":0.40625,"rs12828016":0.40625,"rs156697":0.40625,"rs11096957":0.40625},"consensus_ecp":0.40625,"sample_mismatch_prior":"0.1","swap_threshold":0.5},"summary":{"missing":0,"assayed_pass_rate":"0.6667","total":6,"failed":2},"identity":[{"concordance":"0.0000","identity":"0.0000","missing":0,"sample_name":"urn:wtsi:000000_A00_DUMMY-SAMPLE","genotypes":{"rs6557634":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6759892":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs6166":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7627615":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs753381":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs532841":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs5215":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":1},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249441_F11_HELIC5102138","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249442_C09_HELIC5102247","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249461_G12_HELIC5215300","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs1805087":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4925":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs2247870":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3742207":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs3795677":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]},"rs4843075":{"qc":[["NN","0","_unknown_callset_"]],"production":["NN","0","_unknown_callset_"]}},"failed":0},{"concordance":"1.0000","identity":"1.0000","missing":0,"sample_name":"urn:wtsi:249469_H06_HELIC5274668","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":0},{"concordance":"0.8000","identity":"0.5283","missing":0,"sample_name":"urn:wtsi:249470_F02_HELIC5274730","genotypes":{"rs6557634":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs6759892":{"qc":[["GT","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs6166":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7627615":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs753381":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4075254":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs1805087":{"qc":[["GG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4925":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs8065080":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs532841":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs2286963":{"qc":[["TG","1","_unknown_callset_"]],"production":["GT","1","_unknown_callset_"]},"rs2247870":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs649058":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs2241714":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs3742207":{"qc":[["AC","1","_unknown_callset_"]],"production":["AC","1","_unknown_callset_"]},"rs3795677":{"qc":[["AA","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs7298565":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs5215":{"qc":[["CT","1","_unknown_callset_"]],"production":["CT","1","_unknown_callset_"]},"rs4619":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]},"rs4843075":{"qc":[["AG","1","_unknown_callset_"]],"production":["AG","1","_unknown_callset_"]}},"failed":1}],"swap":{"comparison":[["urn:wtsi:249470_F02_HELIC5274730","urn:wtsi:000000_A00_DUMMY-SAMPLE","0.0000",0]],"sample_warnings":{},"total_samples_checked":2,"prior":0.5,"total_sample_warnings":0}} \ No newline at end of file diff --git a/src/perl/t/qc/check/identity/simulated_ecp.txt b/src/perl/t/qc/check/identity/simulated_ecp.txt new file mode 100644 index 000000000..7f5637461 --- /dev/null +++ b/src/perl/t/qc/check/identity/simulated_ecp.txt @@ -0,0 +1,541 @@ +ecp concord id +0.000 0.000 0.00000000 +0.000 0.038 1.00000000 +0.000 0.077 1.00000000 +0.000 0.115 1.00000000 +0.000 0.154 1.00000000 +0.000 0.192 1.00000000 +0.000 0.231 1.00000000 +0.000 0.269 1.00000000 +0.000 0.308 1.00000000 +0.000 0.346 1.00000000 +0.000 0.385 1.00000000 +0.000 0.423 1.00000000 +0.000 0.462 1.00000000 +0.000 0.500 1.00000000 +0.000 0.538 1.00000000 +0.000 0.577 1.00000000 +0.000 0.615 1.00000000 +0.000 0.654 1.00000000 +0.000 0.692 1.00000000 +0.000 0.731 1.00000000 +0.000 0.769 1.00000000 +0.000 0.808 1.00000000 +0.000 0.846 1.00000000 +0.000 0.885 1.00000000 +0.000 0.923 1.00000000 +0.000 0.962 1.00000000 +0.000 1.000 1.00000000 +0.050 0.000 0.00000000 +0.050 0.038 0.00000000 +0.050 0.077 0.00000000 +0.050 0.115 0.00000000 +0.050 0.154 0.00000000 +0.050 0.192 0.00000000 +0.050 0.231 0.00000000 +0.050 0.269 0.00000000 +0.050 0.308 0.00000000 +0.050 0.346 0.00000000 +0.050 0.385 0.00000000 +0.050 0.423 0.00000000 +0.050 0.462 0.00000000 +0.050 0.500 0.00000014 +0.050 0.538 0.00026071 +0.050 0.577 0.32909151 +0.050 0.615 0.99891735 +0.050 0.654 0.99999942 +0.050 0.692 1.00000000 +0.050 0.731 1.00000000 +0.050 0.769 1.00000000 +0.050 0.808 1.00000000 +0.050 0.846 1.00000000 +0.050 0.885 1.00000000 +0.050 0.923 1.00000000 +0.050 0.962 1.00000000 +0.050 1.000 1.00000000 +0.100 0.000 0.00000000 +0.100 0.038 0.00000000 +0.100 0.077 0.00000000 +0.100 0.115 0.00000000 +0.100 0.154 0.00000000 +0.100 0.192 0.00000000 +0.100 0.231 0.00000000 +0.100 0.269 0.00000000 +0.100 0.308 0.00000000 +0.100 0.346 0.00000000 +0.100 0.385 0.00000000 +0.100 0.423 0.00000000 +0.100 0.462 0.00000000 +0.100 0.500 0.00000000 +0.100 0.538 0.00000003 +0.100 0.577 0.00002713 +0.100 0.615 0.02360472 +0.100 0.654 0.95563494 +0.100 0.692 0.99994790 +0.100 0.731 0.99999994 +0.100 0.769 1.00000000 +0.100 0.808 1.00000000 +0.100 0.846 1.00000000 +0.100 0.885 1.00000000 +0.100 0.923 1.00000000 +0.100 0.962 1.00000000 +0.100 1.000 1.00000000 +0.150 0.000 0.00000000 +0.150 0.038 0.00000000 +0.150 0.077 0.00000000 +0.150 0.115 0.00000000 +0.150 0.154 0.00000000 +0.150 0.192 0.00000000 +0.150 0.231 0.00000000 +0.150 0.269 0.00000000 +0.150 0.308 0.00000000 +0.150 0.346 0.00000000 +0.150 0.385 0.00000000 +0.150 0.423 0.00000000 +0.150 0.462 0.00000000 +0.150 0.500 0.00000000 +0.150 0.538 0.00000000 +0.150 0.577 0.00000012 +0.150 0.615 0.00006518 +0.150 0.654 0.03527878 +0.150 0.692 0.95352112 +0.150 0.731 0.99991312 +0.150 0.769 0.99999985 +0.150 0.808 1.00000000 +0.150 0.846 1.00000000 +0.150 0.885 1.00000000 +0.150 0.923 1.00000000 +0.150 0.962 1.00000000 +0.150 1.000 1.00000000 +0.200 0.000 0.00000000 +0.200 0.038 0.00000000 +0.200 0.077 0.00000000 +0.200 0.115 0.00000000 +0.200 0.154 0.00000000 +0.200 0.192 0.00000000 +0.200 0.231 0.00000000 +0.200 0.269 0.00000000 +0.200 0.308 0.00000000 +0.200 0.346 0.00000000 +0.200 0.385 0.00000000 +0.200 0.423 0.00000000 +0.200 0.462 0.00000000 +0.200 0.500 0.00000000 +0.200 0.538 0.00000000 +0.200 0.577 0.00000000 +0.200 0.615 0.00000120 +0.200 0.654 0.00047414 +0.200 0.692 0.15814215 +0.200 0.731 0.98673533 +0.200 0.769 0.99996605 +0.200 0.808 0.99999991 +0.200 0.846 1.00000000 +0.200 0.885 1.00000000 +0.200 0.923 1.00000000 +0.200 0.962 1.00000000 +0.200 1.000 1.00000000 +0.250 0.000 0.00000000 +0.250 0.038 0.00000000 +0.250 0.077 0.00000000 +0.250 0.115 0.00000000 +0.250 0.154 0.00000000 +0.250 0.192 0.00000000 +0.250 0.231 0.00000000 +0.250 0.269 0.00000000 +0.250 0.308 0.00000000 +0.250 0.346 0.00000000 +0.250 0.385 0.00000000 +0.250 0.423 0.00000000 +0.250 0.462 0.00000000 +0.250 0.500 0.00000000 +0.250 0.538 0.00000000 +0.250 0.577 0.00000000 +0.250 0.615 0.00000006 +0.250 0.654 0.00001909 +0.250 0.692 0.00563901 +0.250 0.731 0.62746094 +0.250 0.769 0.99800492 +0.250 0.808 0.99999327 +0.250 0.846 0.99999998 +0.250 0.885 1.00000000 +0.250 0.923 1.00000000 +0.250 0.962 1.00000000 +0.250 1.000 1.00000000 +0.300 0.000 0.00000000 +0.300 0.038 0.00000000 +0.300 0.077 0.00000000 +0.300 0.115 0.00000000 +0.300 0.154 0.00000000 +0.300 0.192 0.00000000 +0.300 0.231 0.00000000 +0.300 0.269 0.00000000 +0.300 0.308 0.00000000 +0.300 0.346 0.00000000 +0.300 0.385 0.00000000 +0.300 0.423 0.00000000 +0.300 0.462 0.00000000 +0.300 0.500 0.00000000 +0.300 0.538 0.00000000 +0.300 0.577 0.00000000 +0.300 0.615 0.00000001 +0.300 0.654 0.00000160 +0.300 0.692 0.00036978 +0.300 0.731 0.07872339 +0.300 0.769 0.95178173 +0.300 0.808 0.99978074 +0.300 0.846 0.99999905 +0.300 0.885 1.00000000 +0.300 0.923 1.00000000 +0.300 0.962 1.00000000 +0.300 1.000 1.00000000 +0.350 0.000 0.00000000 +0.350 0.038 0.00000000 +0.350 0.077 0.00000000 +0.350 0.115 0.00000000 +0.350 0.154 0.00000000 +0.350 0.192 0.00000000 +0.350 0.231 0.00000000 +0.350 0.269 0.00000000 +0.350 0.308 0.00000000 +0.350 0.346 0.00000000 +0.350 0.385 0.00000000 +0.350 0.423 0.00000000 +0.350 0.462 0.00000000 +0.350 0.500 0.00000000 +0.350 0.538 0.00000000 +0.350 0.577 0.00000000 +0.350 0.615 0.00000000 +0.350 0.654 0.00000023 +0.350 0.692 0.00004174 +0.350 0.731 0.00761548 +0.350 0.769 0.58521816 +0.350 0.808 0.99615983 +0.350 0.846 0.99997903 +0.350 0.885 0.99999989 +0.350 0.923 1.00000000 +0.350 0.962 1.00000000 +0.350 1.000 1.00000000 +0.400 0.000 0.00000000 +0.400 0.038 0.00000000 +0.400 0.077 0.00000000 +0.400 0.115 0.00000000 +0.400 0.154 0.00000000 +0.400 0.192 0.00000000 +0.400 0.231 0.00000000 +0.400 0.269 0.00000000 +0.400 0.308 0.00000000 +0.400 0.346 0.00000000 +0.400 0.385 0.00000000 +0.400 0.423 0.00000000 +0.400 0.462 0.00000000 +0.400 0.500 0.00000000 +0.400 0.538 0.00000000 +0.400 0.577 0.00000000 +0.400 0.615 0.00000000 +0.400 0.654 0.00000005 +0.400 0.692 0.00000716 +0.400 0.731 0.00106180 +0.400 0.769 0.13632679 +0.400 0.808 0.95908352 +0.400 0.846 0.99971280 +0.400 0.885 0.99999807 +0.400 0.923 0.99999999 +0.400 0.962 1.00000000 +0.400 1.000 1.00000000 +0.450 0.000 0.00000000 +0.450 0.038 0.00000000 +0.450 0.077 0.00000000 +0.450 0.115 0.00000000 +0.450 0.154 0.00000000 +0.450 0.192 0.00000000 +0.450 0.231 0.00000000 +0.450 0.269 0.00000000 +0.450 0.308 0.00000000 +0.450 0.346 0.00000000 +0.450 0.385 0.00000000 +0.450 0.423 0.00000000 +0.450 0.462 0.00000000 +0.450 0.500 0.00000000 +0.450 0.538 0.00000000 +0.450 0.577 0.00000000 +0.450 0.615 0.00000000 +0.450 0.654 0.00000001 +0.450 0.692 0.00000172 +0.450 0.731 0.00020847 +0.450 0.769 0.02460886 +0.450 0.808 0.75325688 +0.450 0.846 0.99730013 +0.450 0.885 0.99997763 +0.450 0.923 0.99999982 +0.450 0.962 1.00000000 +0.450 1.000 1.00000000 +0.500 0.000 0.00000000 +0.500 0.038 0.00000000 +0.500 0.077 0.00000000 +0.500 0.115 0.00000000 +0.500 0.154 0.00000000 +0.500 0.192 0.00000000 +0.500 0.231 0.00000000 +0.500 0.269 0.00000000 +0.500 0.308 0.00000000 +0.500 0.346 0.00000000 +0.500 0.385 0.00000000 +0.500 0.423 0.00000000 +0.500 0.462 0.00000000 +0.500 0.500 0.00000000 +0.500 0.538 0.00000000 +0.500 0.577 0.00000000 +0.500 0.615 0.00000000 +0.500 0.654 0.00000001 +0.500 0.692 0.00000055 +0.500 0.731 0.00005489 +0.500 0.769 0.00540462 +0.500 0.808 0.34979022 +0.500 0.846 0.98156973 +0.500 0.885 0.99981038 +0.500 0.923 0.99999808 +0.500 0.962 0.99999998 +0.500 1.000 1.00000000 +0.550 0.000 0.00000000 +0.550 0.038 0.00000000 +0.550 0.077 0.00000000 +0.550 0.115 0.00000000 +0.550 0.154 0.00000000 +0.550 0.192 0.00000000 +0.550 0.231 0.00000000 +0.550 0.269 0.00000000 +0.550 0.308 0.00000000 +0.550 0.346 0.00000000 +0.550 0.385 0.00000000 +0.550 0.423 0.00000000 +0.550 0.462 0.00000000 +0.550 0.500 0.00000000 +0.550 0.538 0.00000000 +0.550 0.577 0.00000000 +0.550 0.615 0.00000000 +0.550 0.654 0.00000000 +0.550 0.692 0.00000023 +0.550 0.731 0.00001876 +0.550 0.769 0.00151758 +0.550 0.808 0.10961569 +0.550 0.846 0.90885853 +0.550 0.885 0.99876349 +0.550 0.923 0.99998472 +0.550 0.962 0.99999981 +0.550 1.000 1.00000000 +0.600 0.000 0.00000000 +0.600 0.038 0.00000000 +0.600 0.077 0.00000000 +0.600 0.115 0.00000000 +0.600 0.154 0.00000000 +0.600 0.192 0.00000000 +0.600 0.231 0.00000000 +0.600 0.269 0.00000000 +0.600 0.308 0.00000000 +0.600 0.346 0.00000000 +0.600 0.385 0.00000000 +0.600 0.423 0.00000000 +0.600 0.462 0.00000000 +0.600 0.500 0.00000000 +0.600 0.538 0.00000000 +0.600 0.577 0.00000000 +0.600 0.615 0.00000000 +0.600 0.654 0.00000000 +0.600 0.692 0.00000012 +0.600 0.731 0.00000819 +0.600 0.769 0.00054041 +0.600 0.808 0.03445640 +0.600 0.846 0.70196202 +0.600 0.885 0.99360811 +0.600 0.923 0.99990254 +0.600 0.962 0.99999852 +0.600 1.000 0.99999998 +0.650 0.000 0.00000000 +0.650 0.038 0.00000000 +0.650 0.077 0.00000000 +0.650 0.115 0.00000000 +0.650 0.154 0.00000000 +0.650 0.192 0.00000000 +0.650 0.231 0.00000000 +0.650 0.269 0.00000000 +0.650 0.308 0.00000000 +0.650 0.346 0.00000000 +0.650 0.385 0.00000000 +0.650 0.423 0.00000000 +0.650 0.462 0.00000000 +0.650 0.500 0.00000000 +0.650 0.538 0.00000000 +0.650 0.577 0.00000000 +0.650 0.615 0.00000000 +0.650 0.654 0.00000000 +0.650 0.692 0.00000009 +0.650 0.731 0.00000456 +0.650 0.769 0.00024297 +0.650 0.808 0.01278978 +0.650 0.846 0.40850342 +0.650 0.885 0.97355597 +0.650 0.923 0.99949072 +0.650 0.962 0.99999044 +0.650 1.000 0.99999982 +0.700 0.000 0.00000000 +0.700 0.038 0.00000000 +0.700 0.077 0.00000000 +0.700 0.115 0.00000000 +0.700 0.154 0.00000000 +0.700 0.192 0.00000000 +0.700 0.231 0.00000000 +0.700 0.269 0.00000000 +0.700 0.308 0.00000000 +0.700 0.346 0.00000000 +0.700 0.385 0.00000000 +0.700 0.423 0.00000000 +0.700 0.462 0.00000000 +0.700 0.500 0.00000000 +0.700 0.538 0.00000000 +0.700 0.577 0.00000000 +0.700 0.615 0.00000000 +0.700 0.654 0.00000000 +0.700 0.692 0.00000008 +0.700 0.731 0.00000328 +0.700 0.769 0.00013918 +0.700 0.808 0.00587154 +0.700 0.846 0.20037884 +0.700 0.885 0.91403225 +0.700 0.923 0.99778816 +0.700 0.962 0.99994776 +0.700 1.000 0.99999877 +0.750 0.000 0.00000000 +0.750 0.038 0.00000000 +0.750 0.077 0.00000000 +0.750 0.115 0.00000000 +0.750 0.154 0.00000000 +0.750 0.192 0.00000000 +0.750 0.231 0.00000000 +0.750 0.269 0.00000000 +0.750 0.308 0.00000000 +0.750 0.346 0.00000000 +0.750 0.385 0.00000000 +0.750 0.423 0.00000000 +0.750 0.462 0.00000000 +0.750 0.500 0.00000000 +0.750 0.538 0.00000000 +0.750 0.577 0.00000000 +0.750 0.615 0.00000000 +0.750 0.654 0.00000000 +0.750 0.692 0.00000010 +0.750 0.731 0.00000317 +0.750 0.769 0.00010458 +0.750 0.808 0.00343947 +0.750 0.846 0.10224868 +0.750 0.885 0.78985014 +0.750 0.923 0.99200197 +0.750 0.962 0.99975574 +0.750 1.000 0.99999260 +0.800 0.000 0.00000000 +0.800 0.038 0.00000000 +0.800 0.077 0.00000000 +0.800 0.115 0.00000000 +0.800 0.154 0.00000000 +0.800 0.192 0.00000000 +0.800 0.231 0.00000000 +0.800 0.269 0.00000000 +0.800 0.308 0.00000000 +0.800 0.346 0.00000000 +0.800 0.385 0.00000000 +0.800 0.423 0.00000000 +0.800 0.462 0.00000000 +0.800 0.500 0.00000000 +0.800 0.538 0.00000000 +0.800 0.577 0.00000000 +0.800 0.615 0.00000000 +0.800 0.654 0.00000001 +0.800 0.692 0.00000018 +0.800 0.731 0.00000443 +0.800 0.769 0.00010973 +0.800 0.808 0.00270867 +0.800 0.846 0.06298761 +0.800 0.885 0.62458773 +0.800 0.923 0.97629069 +0.800 0.962 0.99901975 +0.800 1.000 0.99996036 +0.850 0.000 0.00000000 +0.850 0.038 0.00000000 +0.850 0.077 0.00000000 +0.850 0.115 0.00000000 +0.850 0.154 0.00000000 +0.850 0.192 0.00000000 +0.850 0.231 0.00000000 +0.850 0.269 0.00000000 +0.850 0.308 0.00000000 +0.850 0.346 0.00000000 +0.850 0.385 0.00000000 +0.850 0.423 0.00000000 +0.850 0.462 0.00000000 +0.850 0.500 0.00000000 +0.850 0.538 0.00000000 +0.850 0.577 0.00000000 +0.850 0.615 0.00000000 +0.850 0.654 0.00000003 +0.850 0.692 0.00000060 +0.850 0.731 0.00001050 +0.850 0.769 0.00018337 +0.850 0.808 0.00319397 +0.850 0.846 0.05301181 +0.850 0.885 0.49443695 +0.850 0.923 0.94470894 +0.850 0.962 0.99666115 +0.850 1.000 0.99980828 +0.900 0.000 0.00000000 +0.900 0.038 0.00000000 +0.900 0.077 0.00000000 +0.900 0.115 0.00000000 +0.900 0.154 0.00000000 +0.900 0.192 0.00000000 +0.900 0.231 0.00000000 +0.900 0.269 0.00000000 +0.900 0.308 0.00000000 +0.900 0.346 0.00000000 +0.900 0.385 0.00000000 +0.900 0.423 0.00000000 +0.900 0.462 0.00000000 +0.900 0.500 0.00000000 +0.900 0.538 0.00000000 +0.900 0.577 0.00000000 +0.900 0.615 0.00000005 +0.900 0.654 0.00000050 +0.900 0.692 0.00000550 +0.900 0.731 0.00006054 +0.900 0.769 0.00066558 +0.900 0.808 0.00727296 +0.900 0.846 0.07457853 +0.900 0.885 0.46991108 +0.900 0.923 0.90698743 +0.900 0.962 0.99076328 +0.900 1.000 0.99915319 +0.950 0.000 0.00000000 +0.950 0.038 0.00000000 +0.950 0.077 0.00000000 +0.950 0.115 0.00000000 +0.950 0.154 0.00000000 +0.950 0.192 0.00000000 +0.950 0.231 0.00000000 +0.950 0.269 0.00000000 +0.950 0.308 0.00000000 +0.950 0.346 0.00000000 +0.950 0.385 0.00000000 +0.950 0.423 0.00000001 +0.950 0.462 0.00000003 +0.950 0.500 0.00000014 +0.950 0.538 0.00000072 +0.950 0.577 0.00000376 +0.950 0.615 0.00001961 +0.950 0.654 0.00010218 +0.950 0.692 0.00053217 +0.950 0.731 0.00276670 +0.950 0.769 0.01424996 +0.950 0.808 0.07004700 +0.950 0.846 0.28185339 +0.950 0.885 0.67159197 +0.950 0.923 0.91420350 +0.950 0.962 0.98230737 +0.950 1.000 0.99655519 diff --git a/src/perl/t/qc/check/identity/simulated_qcr.txt b/src/perl/t/qc/check/identity/simulated_qcr.txt new file mode 100644 index 000000000..d05581437 --- /dev/null +++ b/src/perl/t/qc/check/identity/simulated_qcr.txt @@ -0,0 +1,109 @@ +qcr concord id +1 0.000 0.00000000 +1 0.038 0.00000000 +1 0.077 0.00000000 +1 0.115 0.00000000 +1 0.154 0.00000000 +1 0.192 0.00000000 +1 0.231 0.00000000 +1 0.269 0.00000000 +1 0.308 0.00000000 +1 0.346 0.00000000 +1 0.385 0.00000000 +1 0.423 0.00000000 +1 0.462 0.00000000 +1 0.500 0.00000000 +1 0.538 0.00000000 +1 0.577 0.00000000 +1 0.615 0.00000000 +1 0.654 0.00000004 +1 0.692 0.00000589 +1 0.731 0.00085121 +1 0.769 0.10974040 +1 0.808 0.94690992 +1 0.846 0.99961266 +1 0.885 0.99999732 +1 0.923 0.99999998 +1 0.962 1.00000000 +1 1.000 1.00000000 +2 0.000 0.00000000 +2 0.038 0.00000000 +2 0.077 0.00000000 +2 0.115 0.00000000 +2 0.154 0.00000000 +2 0.192 0.00000000 +2 0.231 0.00000000 +2 0.269 0.00000000 +2 0.308 0.00000000 +2 0.346 0.00000000 +2 0.385 0.00000000 +2 0.423 0.00000000 +2 0.462 0.00000000 +2 0.500 0.00000000 +2 0.538 0.00000000 +2 0.577 0.00000000 +2 0.615 0.00000000 +2 0.654 0.00000000 +2 0.692 0.00000000 +2 0.731 0.00000001 +2 0.769 0.00015346 +2 0.808 0.76265808 +2 0.846 0.99998514 +2 0.885 1.00000000 +2 0.923 1.00000000 +2 0.962 1.00000000 +2 1.000 1.00000000 +3 0.000 0.00000000 +3 0.038 0.00000000 +3 0.077 0.00000000 +3 0.115 0.00000000 +3 0.154 0.00000000 +3 0.192 0.00000000 +3 0.231 0.00000000 +3 0.269 0.00000000 +3 0.308 0.00000000 +3 0.346 0.00000000 +3 0.385 0.00000000 +3 0.423 0.00000000 +3 0.462 0.00000000 +3 0.500 0.00000000 +3 0.538 0.00000000 +3 0.577 0.00000000 +3 0.615 0.00000000 +3 0.654 0.00000000 +3 0.692 0.00000000 +3 0.731 0.00000000 +3 0.769 0.00000019 +3 0.808 0.36665408 +3 0.846 0.99999943 +3 0.885 1.00000000 +3 0.923 1.00000000 +3 0.962 1.00000000 +3 1.000 1.00000000 +4 0.000 0.00000000 +4 0.038 0.00000000 +4 0.077 0.00000000 +4 0.115 0.00000000 +4 0.154 0.00000000 +4 0.192 0.00000000 +4 0.231 0.00000000 +4 0.269 0.00000000 +4 0.308 0.00000000 +4 0.346 0.00000000 +4 0.385 0.00000000 +4 0.423 0.00000000 +4 0.462 0.00000000 +4 0.500 0.00000000 +4 0.538 0.00000000 +4 0.577 0.00000000 +4 0.615 0.00000000 +4 0.654 0.00000000 +4 0.692 0.00000000 +4 0.731 0.00000000 +4 0.769 0.00000000 +4 0.808 0.09444727 +4 0.846 0.99999998 +4 0.885 1.00000000 +4 0.923 1.00000000 +4 0.962 1.00000000 +4 1.000 1.00000000 diff --git a/src/perl/t/qc/check/identity/simulated_qcs.txt b/src/perl/t/qc/check/identity/simulated_qcs.txt new file mode 100644 index 000000000..1139ecd59 --- /dev/null +++ b/src/perl/t/qc/check/identity/simulated_qcs.txt @@ -0,0 +1,316 @@ +qcs concord id +4 0.000 0.00000797 +4 0.250 0.00115124 +4 0.500 0.14293127 +4 0.750 0.96020690 +4 1.000 0.99971367 +5 0.000 0.00000013 +5 0.200 0.00001941 +5 0.400 0.00280085 +5 0.600 0.28896460 +5 0.800 0.98327841 +5 1.000 0.99988248 +6 0.000 0.00000000 +6 0.167 0.00000033 +6 0.333 0.00004730 +6 0.500 0.00679810 +6 0.667 0.49757956 +6 0.833 0.99306991 +6 1.000 0.99995177 +7 0.000 0.00000000 +7 0.143 0.00000001 +7 0.286 0.00000080 +7 0.429 0.00011526 +7 0.571 0.01640618 +7 0.714 0.70704067 +7 0.857 0.99714454 +7 1.000 0.99998021 +8 0.000 0.00000000 +8 0.125 0.00000000 +8 0.250 0.00000001 +8 0.375 0.00000194 +8 0.500 0.00028084 +8 0.625 0.03905978 +8 0.750 0.85468021 +8 0.875 0.99882628 +8 1.000 0.99999188 +9 0.000 0.00000000 +9 0.111 0.00000000 +9 0.222 0.00000000 +9 0.333 0.00000003 +9 0.444 0.00000473 +9 0.556 0.00068412 +9 0.667 0.09012721 +9 0.778 0.93477889 +9 0.889 0.99951803 +9 1.000 0.99999667 +10 0.000 0.00000000 +10 0.100 0.00000000 +10 0.200 0.00000000 +10 0.300 0.00000000 +10 0.400 0.00000008 +10 0.500 0.00001153 +10 0.600 0.00166551 +10 0.700 0.19445059 +10 0.800 0.97216586 +10 0.900 0.99980216 +10 1.000 0.99999863 +11 0.000 0.00000000 +11 0.091 0.00000000 +11 0.182 0.00000000 +11 0.273 0.00000000 +11 0.364 0.00000000 +11 0.455 0.00000019 +11 0.545 0.00002810 +11 0.636 0.00404903 +11 0.727 0.37037457 +11 0.818 0.98838757 +11 0.909 0.99991881 +11 1.000 0.99999944 +12 0.000 0.00000000 +12 0.083 0.00000000 +12 0.167 0.00000000 +12 0.250 0.00000000 +12 0.333 0.00000000 +12 0.417 0.00000000 +12 0.500 0.00000047 +12 0.583 0.00006847 +12 0.667 0.00981011 +12 0.750 0.58907092 +12 0.833 0.99520195 +12 0.917 0.99996668 +12 1.000 0.99999977 +13 0.000 0.00000000 +13 0.077 0.00000000 +13 0.154 0.00000000 +13 0.231 0.00000000 +13 0.308 0.00000000 +13 0.385 0.00000000 +13 0.462 0.00000001 +13 0.538 0.00000115 +13 0.615 0.00016683 +13 0.692 0.02357417 +13 0.769 0.77744907 +13 0.846 0.99802552 +13 0.923 0.99998633 +13 1.000 0.99999991 +14 0.000 0.00000000 +14 0.071 0.00000000 +14 0.143 0.00000000 +14 0.214 0.00000000 +14 0.286 0.00000000 +14 0.357 0.00000000 +14 0.429 0.00000000 +14 0.500 0.00000002 +14 0.571 0.00000281 +14 0.643 0.00040646 +14 0.714 0.05556617 +14 0.786 0.89488106 +14 0.857 0.99918882 +14 0.929 0.99999439 +14 1.000 0.99999996 +15 0.000 0.00000000 +15 0.067 0.00000000 +15 0.133 0.00000000 +15 0.200 0.00000000 +15 0.267 0.00000000 +15 0.333 0.00000000 +15 0.400 0.00000000 +15 0.467 0.00000000 +15 0.533 0.00000005 +15 0.600 0.00000685 +15 0.667 0.00098993 +15 0.733 0.12539815 +15 0.800 0.95401370 +15 0.867 0.99966697 +15 0.933 0.99999770 +15 1.000 0.99999998 +16 0.000 0.00000000 +16 0.062 0.00000000 +16 0.125 0.00000000 +16 0.188 0.00000000 +16 0.250 0.00000000 +16 0.312 0.00000000 +16 0.375 0.00000000 +16 0.438 0.00000000 +16 0.500 0.00000000 +16 0.562 0.00000012 +16 0.625 0.00001669 +16 0.688 0.00240896 +16 0.750 0.25892974 +16 0.812 0.98060341 +16 0.875 0.99986331 +16 0.938 0.99999906 +16 1.000 0.99999999 +17 0.000 0.00000000 +17 0.059 0.00000000 +17 0.118 0.00000000 +17 0.176 0.00000000 +17 0.235 0.00000000 +17 0.294 0.00000000 +17 0.353 0.00000000 +17 0.412 0.00000000 +17 0.471 0.00000000 +17 0.529 0.00000000 +17 0.588 0.00000028 +17 0.647 0.00004067 +17 0.706 0.00585020 +17 0.765 0.45988578 +17 0.824 0.99194845 +17 0.882 0.99994391 +17 0.941 0.99999961 +17 1.000 1.00000000 +18 0.000 0.00000000 +18 0.056 0.00000000 +18 0.111 0.00000000 +18 0.167 0.00000000 +18 0.222 0.00000000 +18 0.278 0.00000000 +18 0.333 0.00000000 +18 0.389 0.00000000 +18 0.444 0.00000000 +18 0.500 0.00000000 +18 0.556 0.00000000 +18 0.611 0.00000068 +18 0.667 0.00009910 +18 0.722 0.01413764 +18 0.778 0.67479073 +18 0.833 0.99668026 +18 0.889 0.99997698 +18 0.944 0.99999984 +18 1.000 1.00000000 +19 0.000 0.00000000 +19 0.053 0.00000000 +19 0.105 0.00000000 +19 0.158 0.00000000 +19 0.211 0.00000000 +19 0.263 0.00000000 +19 0.316 0.00000000 +19 0.368 0.00000000 +19 0.421 0.00000000 +19 0.474 0.00000000 +19 0.526 0.00000000 +19 0.579 0.00000001 +19 0.632 0.00000167 +19 0.684 0.00024146 +19 0.737 0.03376640 +19 0.789 0.83488750 +19 0.842 0.99863506 +19 0.895 0.99999055 +19 0.947 0.99999993 +19 1.000 1.00000000 +20 0.000 0.00000000 +20 0.050 0.00000000 +20 0.100 0.00000000 +20 0.150 0.00000000 +20 0.200 0.00000000 +20 0.250 0.00000000 +20 0.300 0.00000000 +20 0.350 0.00000000 +20 0.400 0.00000000 +20 0.450 0.00000000 +20 0.500 0.00000000 +20 0.550 0.00000000 +20 0.600 0.00000003 +20 0.650 0.00000407 +20 0.700 0.00058822 +20 0.750 0.07847836 +20 0.800 0.92493758 +20 0.850 0.99943944 +20 0.900 0.99999612 +20 0.950 0.99999997 +20 1.000 1.00000000 +21 0.000 0.00000000 +21 0.048 0.00000000 +21 0.095 0.00000000 +21 0.143 0.00000000 +21 0.190 0.00000000 +21 0.238 0.00000000 +21 0.286 0.00000000 +21 0.333 0.00000000 +21 0.381 0.00000000 +21 0.429 0.00000000 +21 0.476 0.00000000 +21 0.524 0.00000000 +21 0.571 0.00000000 +21 0.619 0.00000007 +21 0.667 0.00000991 +21 0.714 0.00143225 +21 0.762 0.17186497 +21 0.810 0.96777142 +21 0.857 0.99976990 +21 0.905 0.99999841 +21 0.952 0.99999999 +21 1.000 1.00000000 +22 0.000 0.00000000 +22 0.045 0.00000000 +22 0.091 0.00000000 +22 0.136 0.00000000 +22 0.182 0.00000000 +22 0.227 0.00000000 +22 0.273 0.00000000 +22 0.318 0.00000000 +22 0.364 0.00000000 +22 0.409 0.00000000 +22 0.455 0.00000000 +22 0.500 0.00000000 +22 0.545 0.00000000 +22 0.591 0.00000000 +22 0.636 0.00000017 +22 0.682 0.00002416 +22 0.727 0.00348311 +22 0.773 0.33587509 +22 0.818 0.98651870 +22 0.864 0.99990556 +22 0.909 0.99999935 +22 0.955 1.00000000 +22 1.000 1.00000000 +23 0.000 0.00000000 +23 0.043 0.00000000 +23 0.087 0.00000000 +23 0.130 0.00000000 +23 0.174 0.00000000 +23 0.217 0.00000000 +23 0.261 0.00000000 +23 0.304 0.00000000 +23 0.348 0.00000000 +23 0.391 0.00000000 +23 0.435 0.00000000 +23 0.478 0.00000000 +23 0.522 0.00000000 +23 0.565 0.00000000 +23 0.609 0.00000000 +23 0.652 0.00000041 +23 0.696 0.00005886 +23 0.739 0.00844580 +23 0.783 0.55206195 +23 0.826 0.99442357 +23 0.870 0.99996125 +23 0.913 0.99999973 +23 0.957 1.00000000 +23 1.000 1.00000000 +24 0.000 0.00000000 +24 0.042 0.00000000 +24 0.083 0.00000000 +24 0.125 0.00000000 +24 0.167 0.00000000 +24 0.208 0.00000000 +24 0.250 0.00000000 +24 0.292 0.00000000 +24 0.333 0.00000000 +24 0.375 0.00000000 +24 0.417 0.00000000 +24 0.458 0.00000000 +24 0.500 0.00000000 +24 0.542 0.00000000 +24 0.583 0.00000000 +24 0.625 0.00000001 +24 0.667 0.00000099 +24 0.708 0.00014344 +24 0.750 0.02033498 +24 0.792 0.75021167 +24 0.833 0.99770414 +24 0.875 0.99998410 +24 0.917 0.99999989 +24 0.958 1.00000000 +24 1.000 1.00000000 diff --git a/src/perl/t/qc/check/identity/simulated_smp.txt b/src/perl/t/qc/check/identity/simulated_smp.txt new file mode 100644 index 000000000..18ecbd672 --- /dev/null +++ b/src/perl/t/qc/check/identity/simulated_smp.txt @@ -0,0 +1,541 @@ +smp concord id +0.010 0.000 0.00000000 +0.010 0.038 0.00000000 +0.010 0.077 0.00000000 +0.010 0.115 0.00000000 +0.010 0.154 0.00000000 +0.010 0.192 0.00000000 +0.010 0.231 0.00000000 +0.010 0.269 0.00000000 +0.010 0.308 0.00000000 +0.010 0.346 0.00000000 +0.010 0.385 0.00000000 +0.010 0.423 0.00000000 +0.010 0.462 0.00000000 +0.010 0.500 0.00000000 +0.010 0.538 0.00000000 +0.010 0.577 0.00000000 +0.010 0.615 0.00000000 +0.010 0.654 0.00000004 +0.010 0.692 0.00000589 +0.010 0.731 0.00085121 +0.010 0.769 0.10974040 +0.010 0.808 0.94690992 +0.010 0.846 0.99961266 +0.010 0.885 0.99999732 +0.010 0.923 0.99999998 +0.010 0.962 1.00000000 +0.010 1.000 1.00000000 +0.060 0.000 0.00000000 +0.060 0.038 0.00000000 +0.060 0.077 0.00000000 +0.060 0.115 0.00000000 +0.060 0.154 0.00000000 +0.060 0.192 0.00000000 +0.060 0.231 0.00000000 +0.060 0.269 0.00000000 +0.060 0.308 0.00000000 +0.060 0.346 0.00000000 +0.060 0.385 0.00000000 +0.060 0.423 0.00000000 +0.060 0.462 0.00000000 +0.060 0.500 0.00000000 +0.060 0.538 0.00000000 +0.060 0.577 0.00000000 +0.060 0.615 0.00000000 +0.060 0.654 0.00000001 +0.060 0.692 0.00000093 +0.060 0.731 0.00013480 +0.060 0.769 0.01913379 +0.060 0.808 0.73839234 +0.060 0.846 0.99755738 +0.060 0.885 0.99998308 +0.060 0.923 0.99999988 +0.060 0.962 1.00000000 +0.060 1.000 1.00000000 +0.110 0.000 0.00000000 +0.110 0.038 0.00000000 +0.110 0.077 0.00000000 +0.110 0.115 0.00000000 +0.110 0.154 0.00000000 +0.110 0.192 0.00000000 +0.110 0.231 0.00000000 +0.110 0.269 0.00000000 +0.110 0.308 0.00000000 +0.110 0.346 0.00000000 +0.110 0.385 0.00000000 +0.110 0.423 0.00000000 +0.110 0.462 0.00000000 +0.110 0.500 0.00000000 +0.110 0.538 0.00000000 +0.110 0.577 0.00000000 +0.110 0.615 0.00000000 +0.110 0.654 0.00000000 +0.110 0.692 0.00000048 +0.110 0.731 0.00006962 +0.110 0.769 0.00997376 +0.110 0.808 0.59310956 +0.110 0.846 0.99528108 +0.110 0.885 0.99996723 +0.110 0.923 0.99999977 +0.110 0.962 1.00000000 +0.110 1.000 1.00000000 +0.160 0.000 0.00000000 +0.160 0.038 0.00000000 +0.160 0.077 0.00000000 +0.160 0.115 0.00000000 +0.160 0.154 0.00000000 +0.160 0.192 0.00000000 +0.160 0.231 0.00000000 +0.160 0.269 0.00000000 +0.160 0.308 0.00000000 +0.160 0.346 0.00000000 +0.160 0.385 0.00000000 +0.160 0.423 0.00000000 +0.160 0.462 0.00000000 +0.160 0.500 0.00000000 +0.160 0.538 0.00000000 +0.160 0.577 0.00000000 +0.160 0.615 0.00000000 +0.160 0.654 0.00000000 +0.160 0.692 0.00000031 +0.160 0.731 0.00004518 +0.160 0.769 0.00649448 +0.160 0.808 0.48608414 +0.160 0.846 0.99274607 +0.160 0.885 0.99994950 +0.160 0.923 0.99999965 +0.160 0.962 1.00000000 +0.160 1.000 1.00000000 +0.210 0.000 0.00000000 +0.210 0.038 0.00000000 +0.210 0.077 0.00000000 +0.210 0.115 0.00000000 +0.210 0.154 0.00000000 +0.210 0.192 0.00000000 +0.210 0.231 0.00000000 +0.210 0.269 0.00000000 +0.210 0.308 0.00000000 +0.210 0.346 0.00000000 +0.210 0.385 0.00000000 +0.210 0.423 0.00000000 +0.210 0.462 0.00000000 +0.210 0.500 0.00000000 +0.210 0.538 0.00000000 +0.210 0.577 0.00000000 +0.210 0.615 0.00000000 +0.210 0.654 0.00000000 +0.210 0.692 0.00000022 +0.210 0.731 0.00003237 +0.210 0.769 0.00466222 +0.210 0.808 0.40396275 +0.210 0.846 0.98990560 +0.210 0.885 0.99992953 +0.210 0.923 0.99999951 +0.210 0.962 1.00000000 +0.210 1.000 1.00000000 +0.260 0.000 0.00000000 +0.260 0.038 0.00000000 +0.260 0.077 0.00000000 +0.260 0.115 0.00000000 +0.260 0.154 0.00000000 +0.260 0.192 0.00000000 +0.260 0.231 0.00000000 +0.260 0.269 0.00000000 +0.260 0.308 0.00000000 +0.260 0.346 0.00000000 +0.260 0.385 0.00000000 +0.260 0.423 0.00000000 +0.260 0.462 0.00000000 +0.260 0.500 0.00000000 +0.260 0.538 0.00000000 +0.260 0.577 0.00000000 +0.260 0.615 0.00000000 +0.260 0.654 0.00000000 +0.260 0.692 0.00000017 +0.260 0.731 0.00002449 +0.260 0.769 0.00353132 +0.260 0.808 0.33895885 +0.260 0.846 0.98670092 +0.260 0.885 0.99990686 +0.260 0.923 0.99999936 +0.260 0.962 1.00000000 +0.260 1.000 1.00000000 +0.310 0.000 0.00000000 +0.310 0.038 0.00000000 +0.310 0.077 0.00000000 +0.310 0.115 0.00000000 +0.310 0.154 0.00000000 +0.310 0.192 0.00000000 +0.310 0.231 0.00000000 +0.310 0.269 0.00000000 +0.310 0.308 0.00000000 +0.310 0.346 0.00000000 +0.310 0.385 0.00000000 +0.310 0.423 0.00000000 +0.310 0.462 0.00000000 +0.310 0.500 0.00000000 +0.310 0.538 0.00000000 +0.310 0.577 0.00000000 +0.310 0.615 0.00000000 +0.310 0.654 0.00000000 +0.310 0.692 0.00000013 +0.310 0.731 0.00001915 +0.310 0.769 0.00276376 +0.310 0.808 0.28622560 +0.310 0.846 0.98305718 +0.310 0.885 0.99988090 +0.310 0.923 0.99999918 +0.310 0.962 0.99999999 +0.310 1.000 1.00000000 +0.360 0.000 0.00000000 +0.360 0.038 0.00000000 +0.360 0.077 0.00000000 +0.360 0.115 0.00000000 +0.360 0.154 0.00000000 +0.360 0.192 0.00000000 +0.360 0.231 0.00000000 +0.360 0.269 0.00000000 +0.360 0.308 0.00000000 +0.360 0.346 0.00000000 +0.360 0.385 0.00000000 +0.360 0.423 0.00000000 +0.360 0.462 0.00000000 +0.360 0.500 0.00000000 +0.360 0.538 0.00000000 +0.360 0.577 0.00000000 +0.360 0.615 0.00000000 +0.360 0.654 0.00000000 +0.360 0.692 0.00000011 +0.360 0.731 0.00001530 +0.360 0.769 0.00220868 +0.360 0.808 0.24258819 +0.360 0.846 0.97887750 +0.360 0.885 0.99985089 +0.360 0.923 0.99999897 +0.360 0.962 0.99999999 +0.360 1.000 1.00000000 +0.410 0.000 0.00000000 +0.410 0.038 0.00000000 +0.410 0.077 0.00000000 +0.410 0.115 0.00000000 +0.410 0.154 0.00000000 +0.410 0.192 0.00000000 +0.410 0.231 0.00000000 +0.410 0.269 0.00000000 +0.410 0.308 0.00000000 +0.410 0.346 0.00000000 +0.410 0.385 0.00000000 +0.410 0.423 0.00000000 +0.410 0.462 0.00000000 +0.410 0.500 0.00000000 +0.410 0.538 0.00000000 +0.410 0.577 0.00000000 +0.410 0.615 0.00000000 +0.410 0.654 0.00000000 +0.410 0.692 0.00000009 +0.410 0.731 0.00001238 +0.410 0.769 0.00178857 +0.410 0.808 0.20588009 +0.410 0.846 0.97403427 +0.410 0.885 0.99981580 +0.410 0.923 0.99999873 +0.410 0.962 0.99999999 +0.410 1.000 1.00000000 +0.460 0.000 0.00000000 +0.460 0.038 0.00000000 +0.460 0.077 0.00000000 +0.460 0.115 0.00000000 +0.460 0.154 0.00000000 +0.460 0.192 0.00000000 +0.460 0.231 0.00000000 +0.460 0.269 0.00000000 +0.460 0.308 0.00000000 +0.460 0.346 0.00000000 +0.460 0.385 0.00000000 +0.460 0.423 0.00000000 +0.460 0.462 0.00000000 +0.460 0.500 0.00000000 +0.460 0.538 0.00000000 +0.460 0.577 0.00000000 +0.460 0.615 0.00000000 +0.460 0.654 0.00000000 +0.460 0.692 0.00000007 +0.460 0.731 0.00001010 +0.460 0.769 0.00145954 +0.460 0.808 0.17457222 +0.460 0.846 0.96835585 +0.460 0.885 0.99977420 +0.460 0.923 0.99999844 +0.460 0.962 0.99999999 +0.460 1.000 1.00000000 +0.510 0.000 0.00000000 +0.510 0.038 0.00000000 +0.510 0.077 0.00000000 +0.510 0.115 0.00000000 +0.510 0.154 0.00000000 +0.510 0.192 0.00000000 +0.510 0.231 0.00000000 +0.510 0.269 0.00000000 +0.510 0.308 0.00000000 +0.510 0.346 0.00000000 +0.510 0.385 0.00000000 +0.510 0.423 0.00000000 +0.510 0.462 0.00000000 +0.510 0.500 0.00000000 +0.510 0.538 0.00000000 +0.510 0.577 0.00000000 +0.510 0.615 0.00000000 +0.510 0.654 0.00000000 +0.510 0.692 0.00000006 +0.510 0.731 0.00000827 +0.510 0.769 0.00119487 +0.510 0.808 0.14755455 +0.510 0.846 0.96160580 +0.510 0.885 0.99972413 +0.510 0.923 0.99999809 +0.510 0.962 0.99999999 +0.510 1.000 1.00000000 +0.560 0.000 0.00000000 +0.560 0.038 0.00000000 +0.560 0.077 0.00000000 +0.560 0.115 0.00000000 +0.560 0.154 0.00000000 +0.560 0.192 0.00000000 +0.560 0.231 0.00000000 +0.560 0.269 0.00000000 +0.560 0.308 0.00000000 +0.560 0.346 0.00000000 +0.560 0.385 0.00000000 +0.560 0.423 0.00000000 +0.560 0.462 0.00000000 +0.560 0.500 0.00000000 +0.560 0.538 0.00000000 +0.560 0.577 0.00000000 +0.560 0.615 0.00000000 +0.560 0.654 0.00000000 +0.560 0.692 0.00000005 +0.560 0.731 0.00000676 +0.560 0.769 0.00097736 +0.560 0.808 0.12400180 +0.560 0.846 0.95344918 +0.560 0.885 0.99966268 +0.560 0.923 0.99999767 +0.560 0.962 0.99999998 +0.560 1.000 1.00000000 +0.610 0.000 0.00000000 +0.610 0.038 0.00000000 +0.610 0.077 0.00000000 +0.610 0.115 0.00000000 +0.610 0.154 0.00000000 +0.610 0.192 0.00000000 +0.610 0.231 0.00000000 +0.610 0.269 0.00000000 +0.610 0.308 0.00000000 +0.610 0.346 0.00000000 +0.610 0.385 0.00000000 +0.610 0.423 0.00000000 +0.610 0.462 0.00000000 +0.610 0.500 0.00000000 +0.610 0.538 0.00000000 +0.610 0.577 0.00000000 +0.610 0.615 0.00000000 +0.610 0.654 0.00000000 +0.610 0.692 0.00000004 +0.610 0.731 0.00000550 +0.610 0.769 0.00079543 +0.610 0.808 0.10328757 +0.610 0.846 0.94339519 +0.610 0.885 0.99958549 +0.610 0.923 0.99999713 +0.610 0.962 0.99999998 +0.610 1.000 1.00000000 +0.660 0.000 0.00000000 +0.660 0.038 0.00000000 +0.660 0.077 0.00000000 +0.660 0.115 0.00000000 +0.660 0.154 0.00000000 +0.660 0.192 0.00000000 +0.660 0.231 0.00000000 +0.660 0.269 0.00000000 +0.660 0.308 0.00000000 +0.660 0.346 0.00000000 +0.660 0.385 0.00000000 +0.660 0.423 0.00000000 +0.660 0.462 0.00000000 +0.660 0.500 0.00000000 +0.660 0.538 0.00000000 +0.660 0.577 0.00000000 +0.660 0.615 0.00000000 +0.660 0.654 0.00000000 +0.660 0.692 0.00000003 +0.660 0.731 0.00000443 +0.660 0.769 0.00064102 +0.660 0.808 0.08492791 +0.660 0.846 0.93069466 +0.660 0.885 0.99948561 +0.660 0.923 0.99999644 +0.660 0.962 0.99999998 +0.660 1.000 1.00000000 +0.710 0.000 0.00000000 +0.710 0.038 0.00000000 +0.710 0.077 0.00000000 +0.710 0.115 0.00000000 +0.710 0.154 0.00000000 +0.710 0.192 0.00000000 +0.710 0.231 0.00000000 +0.710 0.269 0.00000000 +0.710 0.308 0.00000000 +0.710 0.346 0.00000000 +0.710 0.385 0.00000000 +0.710 0.423 0.00000000 +0.710 0.462 0.00000000 +0.710 0.500 0.00000000 +0.710 0.538 0.00000000 +0.710 0.577 0.00000000 +0.710 0.615 0.00000000 +0.710 0.654 0.00000000 +0.710 0.692 0.00000002 +0.710 0.731 0.00000351 +0.710 0.769 0.00050832 +0.710 0.808 0.06854292 +0.710 0.846 0.91414423 +0.710 0.885 0.99935132 +0.710 0.923 0.99999551 +0.710 0.962 0.99999997 +0.710 1.000 1.00000000 +0.760 0.000 0.00000000 +0.760 0.038 0.00000000 +0.760 0.077 0.00000000 +0.760 0.115 0.00000000 +0.760 0.154 0.00000000 +0.760 0.192 0.00000000 +0.760 0.231 0.00000000 +0.760 0.269 0.00000000 +0.760 0.308 0.00000000 +0.760 0.346 0.00000000 +0.760 0.385 0.00000000 +0.760 0.423 0.00000000 +0.760 0.462 0.00000000 +0.760 0.500 0.00000000 +0.760 0.538 0.00000000 +0.760 0.577 0.00000000 +0.760 0.615 0.00000000 +0.760 0.654 0.00000000 +0.760 0.692 0.00000002 +0.760 0.731 0.00000272 +0.760 0.769 0.00039304 +0.760 0.808 0.05383030 +0.760 0.846 0.89168063 +0.760 0.885 0.99916115 +0.760 0.923 0.99999420 +0.760 0.962 0.99999996 +0.760 1.000 1.00000000 +0.810 0.000 0.00000000 +0.810 0.038 0.00000000 +0.810 0.077 0.00000000 +0.810 0.115 0.00000000 +0.810 0.154 0.00000000 +0.810 0.192 0.00000000 +0.810 0.231 0.00000000 +0.810 0.269 0.00000000 +0.810 0.308 0.00000000 +0.810 0.346 0.00000000 +0.810 0.385 0.00000000 +0.810 0.423 0.00000000 +0.810 0.462 0.00000000 +0.810 0.500 0.00000000 +0.810 0.538 0.00000000 +0.810 0.577 0.00000000 +0.810 0.615 0.00000000 +0.810 0.654 0.00000000 +0.810 0.692 0.00000001 +0.810 0.731 0.00000202 +0.810 0.769 0.00029198 +0.810 0.808 0.04054643 +0.810 0.846 0.85944566 +0.810 0.885 0.99887101 +0.810 0.923 0.99999219 +0.810 0.962 0.99999995 +0.810 1.000 1.00000000 +0.860 0.000 0.00000000 +0.860 0.038 0.00000000 +0.860 0.077 0.00000000 +0.860 0.115 0.00000000 +0.860 0.154 0.00000000 +0.860 0.192 0.00000000 +0.860 0.231 0.00000000 +0.860 0.269 0.00000000 +0.860 0.308 0.00000000 +0.860 0.346 0.00000000 +0.860 0.385 0.00000000 +0.860 0.423 0.00000000 +0.860 0.462 0.00000000 +0.860 0.500 0.00000000 +0.860 0.538 0.00000000 +0.860 0.577 0.00000000 +0.860 0.615 0.00000000 +0.860 0.654 0.00000000 +0.860 0.692 0.00000001 +0.860 0.731 0.00000140 +0.860 0.769 0.00020265 +0.860 0.808 0.02849284 +0.860 0.846 0.80929158 +0.860 0.885 0.99837403 +0.860 0.923 0.99998874 +0.860 0.962 0.99999992 +0.860 1.000 1.00000000 +0.910 0.000 0.00000000 +0.910 0.038 0.00000000 +0.910 0.077 0.00000000 +0.910 0.115 0.00000000 +0.910 0.154 0.00000000 +0.910 0.192 0.00000000 +0.910 0.231 0.00000000 +0.910 0.269 0.00000000 +0.910 0.308 0.00000000 +0.910 0.346 0.00000000 +0.910 0.385 0.00000000 +0.910 0.423 0.00000000 +0.910 0.462 0.00000000 +0.910 0.500 0.00000000 +0.910 0.538 0.00000000 +0.910 0.577 0.00000000 +0.910 0.615 0.00000000 +0.910 0.654 0.00000000 +0.910 0.692 0.00000001 +0.910 0.731 0.00000085 +0.910 0.769 0.00012313 +0.910 0.808 0.01750617 +0.910 0.846 0.72052527 +0.910 0.885 0.99732647 +0.910 0.923 0.99998147 +0.910 0.962 0.99999987 +0.910 1.000 1.00000000 +0.960 0.000 0.00000000 +0.960 0.038 0.00000000 +0.960 0.077 0.00000000 +0.960 0.115 0.00000000 +0.960 0.154 0.00000000 +0.960 0.192 0.00000000 +0.960 0.231 0.00000000 +0.960 0.269 0.00000000 +0.960 0.308 0.00000000 +0.960 0.346 0.00000000 +0.960 0.385 0.00000000 +0.960 0.423 0.00000000 +0.960 0.462 0.00000000 +0.960 0.500 0.00000000 +0.960 0.538 0.00000000 +0.960 0.577 0.00000000 +0.960 0.615 0.00000000 +0.960 0.654 0.00000000 +0.960 0.692 0.00000000 +0.960 0.731 0.00000036 +0.960 0.769 0.00005188 +0.960 0.808 0.00745077 +0.960 0.846 0.52065068 +0.960 0.885 0.99367726 +0.960 0.923 0.99995603 +0.960 0.962 0.99999970 +0.960 1.000 1.00000000 diff --git a/src/perl/t/qc/check/identity/simulated_xer.txt b/src/perl/t/qc/check/identity/simulated_xer.txt new file mode 100644 index 000000000..6d6d85865 --- /dev/null +++ b/src/perl/t/qc/check/identity/simulated_xer.txt @@ -0,0 +1,541 @@ +xer concord id +0.010 0.000 0.00000000 +0.010 0.038 0.00000000 +0.010 0.077 0.00000000 +0.010 0.115 0.00000000 +0.010 0.154 0.00000000 +0.010 0.192 0.00000000 +0.010 0.231 0.00000000 +0.010 0.269 0.00000000 +0.010 0.308 0.00000000 +0.010 0.346 0.00000000 +0.010 0.385 0.00000000 +0.010 0.423 0.00000000 +0.010 0.462 0.00000000 +0.010 0.500 0.00000000 +0.010 0.538 0.00000000 +0.010 0.577 0.00000000 +0.010 0.615 0.00000000 +0.010 0.654 0.00000004 +0.010 0.692 0.00000589 +0.010 0.731 0.00085121 +0.010 0.769 0.10974040 +0.010 0.808 0.94690992 +0.010 0.846 0.99961266 +0.010 0.885 0.99999732 +0.010 0.923 0.99999998 +0.010 0.962 1.00000000 +0.010 1.000 1.00000000 +0.020 0.000 0.00000000 +0.020 0.038 0.00000000 +0.020 0.077 0.00000000 +0.020 0.115 0.00000000 +0.020 0.154 0.00000000 +0.020 0.192 0.00000000 +0.020 0.231 0.00000000 +0.020 0.269 0.00000000 +0.020 0.308 0.00000000 +0.020 0.346 0.00000000 +0.020 0.385 0.00000000 +0.020 0.423 0.00000000 +0.020 0.462 0.00000000 +0.020 0.500 0.00000000 +0.020 0.538 0.00000000 +0.020 0.577 0.00000000 +0.020 0.615 0.00000024 +0.020 0.654 0.00001753 +0.020 0.692 0.00125398 +0.020 0.731 0.08249883 +0.020 0.769 0.86558111 +0.020 0.808 0.99783626 +0.020 0.846 0.99996972 +0.020 0.885 0.99999958 +0.020 0.923 0.99999999 +0.020 0.962 1.00000000 +0.020 1.000 1.00000000 +0.030 0.000 0.00000000 +0.030 0.038 0.00000000 +0.030 0.077 0.00000000 +0.030 0.115 0.00000000 +0.030 0.154 0.00000000 +0.030 0.192 0.00000000 +0.030 0.231 0.00000000 +0.030 0.269 0.00000000 +0.030 0.308 0.00000000 +0.030 0.346 0.00000000 +0.030 0.385 0.00000000 +0.030 0.423 0.00000000 +0.030 0.462 0.00000000 +0.030 0.500 0.00000000 +0.030 0.538 0.00000001 +0.030 0.577 0.00000025 +0.030 0.615 0.00001198 +0.030 0.654 0.00056582 +0.030 0.692 0.02605676 +0.030 0.731 0.55836089 +0.030 0.769 0.98353799 +0.030 0.808 0.99964594 +0.030 0.846 0.99999251 +0.030 0.885 0.99999984 +0.030 0.923 1.00000000 +0.030 0.962 1.00000000 +0.030 1.000 1.00000000 +0.040 0.000 0.00000000 +0.040 0.038 0.00000000 +0.040 0.077 0.00000000 +0.040 0.115 0.00000000 +0.040 0.154 0.00000000 +0.040 0.192 0.00000000 +0.040 0.231 0.00000000 +0.040 0.269 0.00000000 +0.040 0.308 0.00000000 +0.040 0.346 0.00000000 +0.040 0.385 0.00000000 +0.040 0.423 0.00000000 +0.040 0.462 0.00000000 +0.040 0.500 0.00000000 +0.040 0.538 0.00000015 +0.040 0.577 0.00000514 +0.040 0.615 0.00018020 +0.040 0.654 0.00628245 +0.040 0.692 0.18151015 +0.040 0.731 0.88608840 +0.040 0.769 0.99634842 +0.040 0.808 0.99989553 +0.040 0.846 0.99999702 +0.040 0.885 0.99999992 +0.040 0.923 1.00000000 +0.040 0.962 1.00000000 +0.040 1.000 1.00000000 +0.050 0.000 0.00000000 +0.050 0.038 0.00000000 +0.050 0.077 0.00000000 +0.050 0.115 0.00000000 +0.050 0.154 0.00000000 +0.050 0.192 0.00000000 +0.050 0.231 0.00000000 +0.050 0.269 0.00000000 +0.050 0.308 0.00000000 +0.050 0.346 0.00000000 +0.050 0.385 0.00000000 +0.050 0.423 0.00000000 +0.050 0.462 0.00000000 +0.050 0.500 0.00000007 +0.050 0.538 0.00000184 +0.050 0.577 0.00005112 +0.050 0.615 0.00141764 +0.050 0.654 0.03792751 +0.050 0.692 0.52261345 +0.050 0.731 0.96815291 +0.050 0.769 0.99881683 +0.050 0.808 0.99995734 +0.050 0.846 0.99999846 +0.050 0.885 0.99999994 +0.050 0.923 1.00000000 +0.050 0.962 1.00000000 +0.050 1.000 1.00000000 +0.060 0.000 0.00000000 +0.060 0.038 0.00000000 +0.060 0.077 0.00000000 +0.060 0.115 0.00000000 +0.060 0.154 0.00000000 +0.060 0.192 0.00000000 +0.060 0.231 0.00000000 +0.060 0.269 0.00000000 +0.060 0.308 0.00000000 +0.060 0.346 0.00000000 +0.060 0.385 0.00000000 +0.060 0.423 0.00000000 +0.060 0.462 0.00000003 +0.060 0.500 0.00000062 +0.060 0.538 0.00001415 +0.060 0.577 0.00032399 +0.060 0.615 0.00736635 +0.060 0.654 0.14524227 +0.060 0.692 0.79553370 +0.060 0.731 0.98889986 +0.060 0.769 0.99951002 +0.060 0.808 0.99997859 +0.060 0.846 0.99999906 +0.060 0.885 0.99999996 +0.060 0.923 1.00000000 +0.060 0.962 1.00000000 +0.060 1.000 1.00000000 +0.070 0.000 0.00000000 +0.070 0.038 0.00000000 +0.070 0.077 0.00000000 +0.070 0.115 0.00000000 +0.070 0.154 0.00000000 +0.070 0.192 0.00000000 +0.070 0.231 0.00000000 +0.070 0.269 0.00000000 +0.070 0.308 0.00000000 +0.070 0.346 0.00000000 +0.070 0.385 0.00000000 +0.070 0.423 0.00000001 +0.070 0.462 0.00000021 +0.070 0.500 0.00000399 +0.070 0.538 0.00007748 +0.070 0.577 0.00150233 +0.070 0.615 0.02838614 +0.070 0.654 0.36195737 +0.070 0.692 0.91677393 +0.070 0.731 0.99534654 +0.070 0.769 0.99975929 +0.070 0.808 0.99998760 +0.070 0.846 0.99999936 +0.070 0.885 0.99999997 +0.070 0.923 1.00000000 +0.070 0.962 1.00000000 +0.070 1.000 1.00000000 +0.080 0.000 0.00000000 +0.080 0.038 0.00000000 +0.080 0.077 0.00000000 +0.080 0.115 0.00000000 +0.080 0.154 0.00000000 +0.080 0.192 0.00000000 +0.080 0.231 0.00000000 +0.080 0.269 0.00000000 +0.080 0.308 0.00000000 +0.080 0.346 0.00000000 +0.080 0.385 0.00000000 +0.080 0.423 0.00000007 +0.080 0.462 0.00000117 +0.080 0.500 0.00001967 +0.080 0.538 0.00033056 +0.080 0.577 0.00552707 +0.080 0.615 0.08543299 +0.080 0.654 0.61090508 +0.080 0.692 0.96348926 +0.080 0.731 0.99775049 +0.080 0.769 0.99986588 +0.080 0.808 0.99999202 +0.080 0.846 0.99999953 +0.080 0.885 0.99999997 +0.080 0.923 1.00000000 +0.080 0.962 1.00000000 +0.080 1.000 1.00000000 +0.090 0.000 0.00000000 +0.090 0.038 0.00000000 +0.090 0.077 0.00000000 +0.090 0.115 0.00000000 +0.090 0.154 0.00000000 +0.090 0.192 0.00000000 +0.090 0.231 0.00000000 +0.090 0.269 0.00000000 +0.090 0.308 0.00000000 +0.090 0.346 0.00000000 +0.090 0.385 0.00000002 +0.090 0.423 0.00000036 +0.090 0.462 0.00000534 +0.090 0.500 0.00007891 +0.090 0.538 0.00116484 +0.090 0.577 0.01694191 +0.090 0.615 0.20298310 +0.090 0.654 0.79007398 +0.090 0.692 0.98233758 +0.090 0.731 0.99878479 +0.090 0.769 0.99991767 +0.090 0.808 0.99999443 +0.090 0.846 0.99999962 +0.090 0.885 0.99999997 +0.090 0.923 1.00000000 +0.090 0.962 1.00000000 +0.090 1.000 1.00000000 +0.100 0.000 0.00000000 +0.100 0.038 0.00000000 +0.100 0.077 0.00000000 +0.100 0.115 0.00000000 +0.100 0.154 0.00000000 +0.100 0.192 0.00000000 +0.100 0.231 0.00000000 +0.100 0.269 0.00000000 +0.100 0.308 0.00000000 +0.100 0.346 0.00000001 +0.100 0.385 0.00000012 +0.100 0.423 0.00000155 +0.100 0.462 0.00002044 +0.100 0.500 0.00026885 +0.100 0.538 0.00352489 +0.100 0.577 0.04446115 +0.100 0.615 0.37967089 +0.100 0.654 0.88951227 +0.100 0.692 0.99064535 +0.100 0.731 0.99928263 +0.100 0.769 0.99994543 +0.100 0.808 0.99999585 +0.100 0.846 0.99999968 +0.100 0.885 0.99999998 +0.100 0.923 1.00000000 +0.100 0.962 1.00000000 +0.100 1.000 1.00000000 +0.110 0.000 0.00000000 +0.110 0.038 0.00000000 +0.110 0.077 0.00000000 +0.110 0.115 0.00000000 +0.110 0.154 0.00000000 +0.110 0.192 0.00000000 +0.110 0.231 0.00000000 +0.110 0.269 0.00000000 +0.110 0.308 0.00000000 +0.110 0.346 0.00000004 +0.110 0.385 0.00000049 +0.110 0.423 0.00000574 +0.110 0.462 0.00006789 +0.110 0.500 0.00080223 +0.110 0.538 0.00940487 +0.110 0.577 0.10093786 +0.110 0.615 0.57037557 +0.110 0.654 0.94011715 +0.110 0.692 0.99464228 +0.110 0.731 0.99954469 +0.110 0.769 0.99996148 +0.110 0.808 0.99999674 +0.110 0.846 0.99999972 +0.110 0.885 0.99999998 +0.110 0.923 1.00000000 +0.110 0.962 1.00000000 +0.110 1.000 1.00000000 +0.120 0.000 0.00000000 +0.120 0.038 0.00000000 +0.120 0.077 0.00000000 +0.120 0.115 0.00000000 +0.120 0.154 0.00000000 +0.120 0.192 0.00000000 +0.120 0.231 0.00000000 +0.120 0.269 0.00000000 +0.120 0.308 0.00000002 +0.120 0.346 0.00000016 +0.120 0.385 0.00000174 +0.120 0.423 0.00001870 +0.120 0.462 0.00020040 +0.120 0.500 0.00214375 +0.120 0.538 0.02250765 +0.120 0.577 0.19794066 +0.120 0.615 0.72565823 +0.120 0.654 0.96592836 +0.120 0.692 0.99671973 +0.120 0.731 0.99969303 +0.120 0.769 0.99997135 +0.120 0.808 0.99999733 +0.120 0.846 0.99999975 +0.120 0.885 0.99999998 +0.120 0.923 1.00000000 +0.120 0.962 1.00000000 +0.120 1.000 1.00000000 +0.130 0.000 0.00000000 +0.130 0.038 0.00000000 +0.130 0.077 0.00000000 +0.130 0.115 0.00000000 +0.130 0.154 0.00000000 +0.130 0.192 0.00000000 +0.130 0.231 0.00000000 +0.130 0.269 0.00000001 +0.130 0.308 0.00000006 +0.130 0.346 0.00000057 +0.130 0.385 0.00000560 +0.130 0.423 0.00005479 +0.130 0.462 0.00053564 +0.130 0.500 0.00521456 +0.130 0.538 0.04877075 +0.130 0.577 0.33399394 +0.130 0.615 0.83065443 +0.130 0.654 0.97958223 +0.130 0.692 0.99787354 +0.130 0.731 0.99978218 +0.130 0.769 0.99997773 +0.130 0.808 0.99999772 +0.130 0.846 0.99999977 +0.130 0.885 0.99999998 +0.130 0.923 1.00000000 +0.130 0.962 1.00000000 +0.130 1.000 1.00000000 +0.140 0.000 0.00000000 +0.140 0.038 0.00000000 +0.140 0.077 0.00000000 +0.140 0.115 0.00000000 +0.140 0.154 0.00000000 +0.140 0.192 0.00000000 +0.140 0.231 0.00000000 +0.140 0.269 0.00000002 +0.140 0.308 0.00000020 +0.140 0.346 0.00000182 +0.140 0.385 0.00001633 +0.140 0.423 0.00014662 +0.140 0.462 0.00131482 +0.140 0.500 0.01168195 +0.140 0.538 0.09593936 +0.140 0.577 0.48790222 +0.140 0.615 0.89532992 +0.140 0.654 0.98714595 +0.140 0.692 0.99855173 +0.140 0.731 0.99983848 +0.140 0.769 0.99998201 +0.140 0.808 0.99999800 +0.140 0.846 0.99999978 +0.140 0.885 0.99999998 +0.140 0.923 1.00000000 +0.140 0.962 1.00000000 +0.140 1.000 1.00000000 +0.150 0.000 0.00000000 +0.150 0.038 0.00000000 +0.150 0.077 0.00000000 +0.150 0.115 0.00000000 +0.150 0.154 0.00000000 +0.150 0.192 0.00000000 +0.150 0.231 0.00000001 +0.150 0.269 0.00000008 +0.150 0.308 0.00000064 +0.150 0.346 0.00000529 +0.150 0.385 0.00004382 +0.150 0.423 0.00036280 +0.150 0.462 0.00299685 +0.150 0.500 0.02428999 +0.150 0.538 0.17093570 +0.150 0.577 0.63066726 +0.150 0.615 0.93395988 +0.150 0.654 0.99153456 +0.150 0.692 0.99897019 +0.150 0.731 0.99987555 +0.150 0.769 0.99998497 +0.150 0.808 0.99999819 +0.150 0.846 0.99999978 +0.150 0.885 0.99999997 +0.150 0.923 1.00000000 +0.150 0.962 1.00000000 +0.150 1.000 1.00000000 +0.160 0.000 0.00000000 +0.160 0.038 0.00000000 +0.160 0.077 0.00000000 +0.160 0.115 0.00000000 +0.160 0.154 0.00000000 +0.160 0.192 0.00000000 +0.160 0.231 0.00000003 +0.160 0.269 0.00000024 +0.160 0.308 0.00000186 +0.160 0.346 0.00001425 +0.160 0.385 0.00010932 +0.160 0.423 0.00083823 +0.160 0.462 0.00639600 +0.160 0.500 0.04706809 +0.160 0.538 0.27483458 +0.160 0.577 0.74411877 +0.160 0.615 0.95710694 +0.160 0.654 0.99419332 +0.160 0.692 0.99923940 +0.160 0.731 0.99990081 +0.160 0.769 0.99998707 +0.160 0.808 0.99999832 +0.160 0.846 0.99999978 +0.160 0.885 0.99999997 +0.160 0.923 1.00000000 +0.160 0.962 1.00000000 +0.160 1.000 1.00000000 +0.170 0.000 0.00000000 +0.170 0.038 0.00000000 +0.170 0.077 0.00000000 +0.170 0.115 0.00000000 +0.170 0.154 0.00000000 +0.170 0.192 0.00000001 +0.170 0.231 0.00000010 +0.170 0.269 0.00000070 +0.170 0.308 0.00000502 +0.170 0.346 0.00003586 +0.170 0.385 0.00025580 +0.170 0.423 0.00182244 +0.170 0.462 0.01286070 +0.170 0.500 0.08505868 +0.170 0.538 0.39881582 +0.170 0.577 0.82559372 +0.170 0.615 0.97124682 +0.170 0.654 0.99586839 +0.170 0.692 0.99941893 +0.170 0.731 0.99991853 +0.170 0.769 0.99998858 +0.170 0.808 0.99999840 +0.170 0.846 0.99999978 +0.170 0.885 0.99999997 +0.170 0.923 1.00000000 +0.170 0.962 1.00000000 +0.170 1.000 1.00000000 +0.180 0.000 0.00000000 +0.180 0.038 0.00000000 +0.180 0.077 0.00000000 +0.180 0.115 0.00000000 +0.180 0.154 0.00000001 +0.180 0.192 0.00000004 +0.180 0.231 0.00000029 +0.180 0.269 0.00000192 +0.180 0.308 0.00001276 +0.180 0.346 0.00008495 +0.180 0.385 0.00056532 +0.180 0.423 0.00375199 +0.180 0.462 0.02446186 +0.180 0.500 0.14306821 +0.180 0.538 0.52642543 +0.180 0.577 0.88096889 +0.180 0.615 0.98011052 +0.180 0.654 0.99696139 +0.180 0.692 0.99954244 +0.180 0.731 0.99993125 +0.180 0.769 0.99998967 +0.180 0.808 0.99999845 +0.180 0.846 0.99999977 +0.180 0.885 0.99999997 +0.180 0.923 0.99999999 +0.180 0.962 1.00000000 +0.180 1.000 1.00000000 +0.190 0.000 0.00000000 +0.190 0.038 0.00000000 +0.190 0.077 0.00000000 +0.190 0.115 0.00000000 +0.190 0.154 0.00000002 +0.190 0.192 0.00000013 +0.190 0.231 0.00000079 +0.190 0.269 0.00000491 +0.190 0.308 0.00003061 +0.190 0.346 0.00019069 +0.190 0.385 0.00118696 +0.190 0.423 0.00735003 +0.190 0.462 0.04410083 +0.190 0.500 0.22327639 +0.190 0.538 0.64171733 +0.190 0.577 0.91776236 +0.190 0.615 0.98582257 +0.190 0.654 0.99769720 +0.190 0.692 0.99962970 +0.190 0.731 0.99994055 +0.190 0.769 0.99999046 +0.190 0.808 0.99999847 +0.190 0.846 0.99999975 +0.190 0.885 0.99999996 +0.190 0.923 0.99999999 +0.190 0.962 1.00000000 +0.190 1.000 1.00000000 +0.200 0.000 0.00000000 +0.200 0.038 0.00000000 +0.200 0.077 0.00000000 +0.200 0.115 0.00000001 +0.200 0.154 0.00000006 +0.200 0.192 0.00000035 +0.200 0.231 0.00000204 +0.200 0.269 0.00001193 +0.200 0.308 0.00006977 +0.200 0.346 0.00040773 +0.200 0.385 0.00237897 +0.200 0.423 0.01374931 +0.200 0.462 0.07535929 +0.200 0.500 0.32270811 +0.200 0.538 0.73583459 +0.200 0.577 0.94214470 +0.200 0.615 0.98960517 +0.200 0.654 0.99820648 +0.200 0.692 0.99969276 +0.200 0.731 0.99994743 +0.200 0.769 0.99999101 +0.200 0.808 0.99999846 +0.200 0.846 0.99999974 +0.200 0.885 0.99999995 +0.200 0.923 0.99999999 +0.200 0.962 1.00000000 +0.200 1.000 1.00000000 diff --git a/src/perl/t/qc_small.t b/src/perl/t/qc_small.t index 2c9072a64..2c9b9750b 100644 --- a/src/perl/t/qc_small.t +++ b/src/perl/t/qc_small.t @@ -13,8 +13,8 @@ use File::Temp qw/tempdir/; use FindBin qw($Bin); use JSON; -use Test::More tests => 49; -use WTSI::NPG::Genotyping::QC::QCPlotShared qw/readFileToString readSampleInclusion/; +use Test::More tests => 53; + use WTSI::NPG::Genotyping::QC::QCPlotTests qw(jsonPathOK pngPathOK xmlPathOK); my $testName = 'small_test'; @@ -151,11 +151,11 @@ my @args = ("--output-dir=$outDir", "--run=$piperun", "--inipath=$iniPath", "--config=$config", - "--vcf=$vcf", - "--plex=$plexManifest", - " --sample-json=$sampleJson", + "--vcf=$vcf,$vcf", + "--plex-manifests=$plexManifest", + "--sample-json=$sampleJson", "--mafhet", - $plink); + "--plink=$plink"); is(system("$bin/run_qc.pl ".join(" ", @args)), 0, "run_qc.pl bootstrap script exit status"); @@ -186,10 +186,25 @@ ok($heatMapsOK, "Plate heatmap outputs OK"); ok(-r $outDir.'/pipeline_summary.csv', "CSV summary found"); ok(-r $outDir.'/pipeline_summary.pdf', "PDF summary found"); + +## check that run_qc.pl dies with incorrect arguments for alternate ID check +system("rm -Rf $outDir/*"); # remove output from previous tests +system("cp $dbfileMasterA $tempdir"); +my $cmd_base = "$bin/run_qc.pl --output-dir=$outDir --dbpath=$dbfile --sim=$sim --plink=$plink --run=$piperun --inipath=$iniPath --mafhet --config=$config"; +isnt(system($cmd_base." --vcf $vcf 2> /dev/null"), 0, + 'Non-zero exit for run_qc.pl with --vcf but not --plex-manifest'); +ok(!(-e $outDir.'/pipeline_summary.csv'), "CSV summary not found"); + +system("rm -Rf $outDir/*"); # remove output from previous tests +system("cp $dbfileMasterA $tempdir"); +isnt(system($cmd_base." --plex-manifests $plexManifest 2> /dev/null"), 0, + 'Non-zero exit for run_qc.pl with --plex-manifest but not --vcf'); +ok(!(-e $outDir.'/pipeline_summary.csv'), "CSV summary not found"); + ## run_qc.pl again, without the arguments for alternate identity check system("rm -Rf $outDir/*"); # remove output from previous tests system("cp $dbfileMasterA $tempdir"); -$cmd = "$bin/run_qc.pl --output-dir=$outDir --dbpath=$dbfile --sim=$sim $plink --run=$piperun --inipath=$iniPath --mafhet --config=$config"; +$cmd = "$bin/run_qc.pl --output-dir=$outDir --dbpath=$dbfile --sim=$sim --plink=$plink --run=$piperun --inipath=$iniPath --mafhet --config=$config"; is(system($cmd), 0, "run_qc.pl bootstrap script exit status, no alternate identity check"); diff --git a/src/perl/t/qc_test_data/output_examples/qc_results.csv b/src/perl/t/qc_test_data/output_examples/qc_results.csv new file mode 100644 index 000000000..b605548e4 --- /dev/null +++ b/src/perl/t/qc_test_data/output_examples/qc_results.csv @@ -0,0 +1,101 @@ +run,project,data_supplier,snpset,rowcol,beadchip_number,supplier_name,cohort,sample,include,plate,well,pass,identity_pass,identity_value,duplicate_pass,duplicate_value,gender_pass,gender_xhet,gender_inferred,gender_supplied,call_rate_pass,call_rate_value,heterozygosity_pass,heterozygosity_value,low_maf_het_pass,low_maf_het_value,high_maf_het_pass,high_maf_het_value,magnitude_pass,magnitude_value,xydiff_pass,xydiff_value +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number001,beadchip_ABC123456,supplier_WeylandYutani00001,xenomorph_cohort,urn:wtsi:plate0001_B01_sample000001,Included,ssbc00000,A02,Fail,Pass,NA,Fail,1.0000,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.2826,Pass,0,Pass,0.12381,Pass,1.026298,Pass,-0.089915 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number002,beadchip_ABC123456,supplier_WeylandYutani00002,xenomorph_cohort,urn:wtsi:plate0001_C01_sample000002,Included,ssbc00000,A03,Fail,Pass,NA,Fail,1.0000,Pass,0.000000,Male,Male,Fail,0.920792,Pass,0.2683,Pass,0,Pass,0.12381,Pass,0.960316,Pass,-0.209444 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number004,beadchip_ABC123456,supplier_WeylandYutani00004,xenomorph_cohort,urn:wtsi:plate0001_E01_sample000004,Included,ssbc00000,A05,Pass,Pass,NA,Pass,1.0000,Pass,0.500000,Female,Female,Pass,0.980198,Pass,0.1364,Pass,0,Pass,0.204762,Pass,1.004945,Pass,-0.079625 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number005,beadchip_ABC123456,supplier_WeylandYutani00005,xenomorph_cohort,urn:wtsi:plate0001_F01_sample000005,Included,ssbc00000,A06,Pass,Pass,NA,Pass,1.0000,Pass,0.500000,Female,Female,Pass,0.980198,Pass,0.1364,Pass,0,Pass,0.204762,Pass,0.992848,Pass,-0.105508 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number006,beadchip_ABC123456,supplier_WeylandYutani00006,xenomorph_cohort,urn:wtsi:plate0001_G01_sample000006,Included,ssbc00000,A07,Fail,Pass,NA,Fail,1.0000,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.2000,Pass,0,Pass,0.233333,Pass,1.016030,Pass,-0.075592 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number007,beadchip_ABC123456,supplier_WeylandYutani00007,xenomorph_cohort,urn:wtsi:plate0001_H01_sample000007,Included,ssbc00000,A08,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.2000,Pass,0,Pass,0.233333,Pass,1.011790,Pass,-0.082686 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number008,beadchip_ABC123456,supplier_WeylandYutani00008,xenomorph_cohort,urn:wtsi:plate0001_A02_sample000008,Included,ssbc00000,A09,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.950495,Pass,0.2143,Pass,0,Pass,0.233333,Pass,0.960365,Pass,-0.157923 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number009,beadchip_ABC123456,supplier_WeylandYutani00009,xenomorph_cohort,urn:wtsi:plate0001_B02_sample000009,Included,ssbc00000,A10,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.950495,Pass,0.2143,Pass,0,Pass,0.233333,Pass,0.985721,Pass,-0.152706 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number010,beadchip_ABC123456,supplier_WeylandYutani00010,xenomorph_cohort,urn:wtsi:plate0001_C02_sample000010,Included,ssbc00000,A11,Pass,Pass,NA,Pass,0,Pass,1.000000,Female,Female,Pass,0.970297,Pass,0.2326,Pass,0,Pass,0.228571,Pass,0.964957,Pass,-0.113543 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number011,beadchip_ABC123456,supplier_WeylandYutani00011,xenomorph_cohort,urn:wtsi:plate0001_D02_sample000011,Included,ssbc00000,A12,Fail,Pass,NA,Pass,0,Fail,0.500000,Female,Male,Pass,0.960396,Pass,0.3023,Pass,0,Pass,0.133333,Pass,0.990876,Pass,-0.109427 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number012,beadchip_ABC123456,supplier_WeylandYutani00012,xenomorph_cohort,urn:wtsi:plate0001_E02_sample000012,Included,ssbc00000,A13,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2093,Pass,0,Pass,0.128571,Pass,1.007754,Pass,-0.149734 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number013,beadchip_ABC123456,supplier_WeylandYutani00013,xenomorph_cohort,urn:wtsi:plate0001_F02_sample000013,Included,ssbc00000,A14,Fail,Pass,NA,Pass,0,Pass,1.000000,Female,Female,Fail,0.940594,Pass,0.1111,Pass,0,Pass,0.142857,Pass,0.981736,Pass,-0.130109 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number014,beadchip_ABC123456,supplier_WeylandYutani00014,xenomorph_cohort,urn:wtsi:plate0001_G02_sample000014,Included,ssbc00000,A15,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.3556,Pass,0,Pass,0.147619,Pass,0.998786,Pass,-0.093042 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number015,beadchip_ABC123456,supplier_WeylandYutani00015,xenomorph_cohort,urn:wtsi:plate0001_H02_sample000015,Included,ssbc00000,A16,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.950495,Pass,0.2444,Pass,0,Pass,0.261905,Pass,0.988325,Pass,-0.139028 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number016,beadchip_ABC123456,supplier_WeylandYutani00016,xenomorph_cohort,urn:wtsi:plate0001_A03_sample000016,Included,ssbc00000,A17,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2143,Pass,0,Pass,0.119048,Pass,1.014335,Pass,-0.245071 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number017,beadchip_ABC123456,supplier_WeylandYutani00017,xenomorph_cohort,urn:wtsi:plate0001_B03_sample000017,Included,ssbc00000,A18,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.1778,Pass,0,Pass,0.114286,Pass,1.043134,Pass,-0.173508 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number018,beadchip_ABC123456,supplier_WeylandYutani00018,xenomorph_cohort,urn:wtsi:plate0001_C03_sample000018,Included,ssbc00000,A19,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2667,Pass,0,Pass,0.128571,Pass,0.985645,Pass,-0.110577 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number019,beadchip_ABC123456,supplier_WeylandYutani00019,xenomorph_cohort,urn:wtsi:plate0001_D03_sample000019,Included,ssbc00000,A20,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.3111,Pass,0,Pass,0.152381,Pass,1.025111,Pass,-0.099961 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number020,beadchip_ABC123456,supplier_WeylandYutani00020,xenomorph_cohort,urn:wtsi:plate0001_E03_sample000020,Included,ssbc00000,A21,Fail,Pass,NA,Pass,0,Fail,0.500000,Female,Male,Pass,0.990099,Pass,0.2222,Pass,0,Pass,0.138095,Pass,0.966975,Pass,-0.011922 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number021,beadchip_ABC123456,supplier_WeylandYutani00021,xenomorph_cohort,urn:wtsi:plate0001_F03_sample000021,Included,ssbc00000,A22,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.1739,Pass,0,Pass,0.095238,Pass,0.994190,Pass,-0.246363 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number023,beadchip_ABC123456,supplier_WeylandYutani00023,xenomorph_cohort,urn:wtsi:plate0001_H03_sample000023,Included,ssbc00000,A24,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.2609,Pass,0,Pass,0.119048,Pass,1.013329,Pass,-0.217666 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number024,beadchip_ABC123456,supplier_WeylandYutani00024,xenomorph_cohort,urn:wtsi:plate0001_A04_sample000024,Included,ssbc00000,B01,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,1.000000,Pass,0.1304,Pass,0,Pass,0.214286,Pass,0.986381,Pass,-0.100486 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number025,beadchip_ABC123456,supplier_WeylandYutani00025,xenomorph_cohort,urn:wtsi:plate0001_B04_sample000025,Included,ssbc00000,B02,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.950495,Pass,0.2955,Pass,0,Pass,0.228571,Pass,1.016424,Pass,-0.039542 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number026,beadchip_ABC123456,supplier_WeylandYutani00026,xenomorph_cohort,urn:wtsi:plate0001_C04_sample000026,Included,ssbc00000,B03,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.960396,Pass,0.2000,Pass,0,Pass,0.22381,Pass,0.969004,Pass,-0.161896 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number027,beadchip_ABC123456,supplier_WeylandYutani00027,xenomorph_cohort,urn:wtsi:plate0001_D04_sample000027,Included,ssbc00000,B04,Pass,Pass,NA,Pass,0,Pass,1.000000,Female,Female,Pass,0.960396,Pass,0.3182,Pass,0,Pass,0.27619,Pass,0.991528,Pass,-0.182563 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number028,beadchip_ABC123456,supplier_WeylandYutani00028,xenomorph_cohort,urn:wtsi:plate0001_E04_sample000028,Included,ssbc00000,B05,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.950495,Pass,0.3261,Pass,0,Pass,0.319048,Pass,1.005525,Pass,-0.120576 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number029,beadchip_ABC123456,supplier_WeylandYutani00029,xenomorph_cohort,urn:wtsi:plate0001_F04_sample000029,Included,ssbc00000,B06,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.2391,Pass,0,Pass,0.138095,Pass,1.016970,Pass,-0.201367 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number030,beadchip_ABC123456,supplier_WeylandYutani00030,xenomorph_cohort,urn:wtsi:plate0001_G04_sample000030,Included,ssbc00000,B07,Pass,Pass,NA,Pass,0,Pass,1.000000,Female,Female,Pass,0.970297,Pass,0.1364,Pass,0,Pass,0.219048,Pass,1.024849,Pass,-0.240325 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number031,beadchip_ABC123456,supplier_WeylandYutani00031,xenomorph_cohort,urn:wtsi:plate0001_H04_sample000031,Included,ssbc00000,B08,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.3556,Pass,0,Pass,0.161905,Pass,1.011570,Pass,-0.181667 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number032,beadchip_ABC123456,supplier_WeylandYutani00032,xenomorph_cohort,urn:wtsi:plate0001_A05_sample000032,Included,ssbc00000,B09,Fail,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Fail,0.940594,Pass,0.2619,Pass,0,Pass,0.157143,Pass,1.026305,Pass,-0.145595 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number033,beadchip_ABC123456,supplier_WeylandYutani00033,xenomorph_cohort,urn:wtsi:plate0001_B05_sample000033,Included,ssbc00000,B10,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2791,Pass,0,Pass,0.119048,Pass,0.971288,Pass,-0.196105 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number034,beadchip_ABC123456,supplier_WeylandYutani00034,xenomorph_cohort,urn:wtsi:plate0001_C05_sample000034,Included,ssbc00000,B11,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.2889,Pass,0,Pass,0.114286,Pass,0.991093,Pass,-0.104210 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number035,beadchip_ABC123456,supplier_WeylandYutani00035,xenomorph_cohort,urn:wtsi:plate0001_D05_sample000035,Included,ssbc00000,B12,Fail,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.980198,Pass,0.3556,Pass,0,Pass,0.285714,Pass,0.996472,Fail,0.001561 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number036,beadchip_ABC123456,supplier_WeylandYutani00036,xenomorph_cohort,urn:wtsi:plate0001_E05_sample000036,Included,ssbc00000,B13,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.2045,Pass,0,Pass,0.242857,Pass,1.014827,Pass,-0.139358 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number037,beadchip_ABC123456,supplier_WeylandYutani00037,xenomorph_cohort,urn:wtsi:plate0001_F05_sample000037,Included,ssbc00000,B14,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2889,Pass,0,Pass,0.128571,Pass,0.998067,Pass,-0.049692 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number038,beadchip_ABC123456,supplier_WeylandYutani00038,xenomorph_cohort,urn:wtsi:plate0001_G05_sample000038,Included,ssbc00000,B15,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.960396,Pass,0.2326,Pass,0,Pass,0.22381,Pass,1.015826,Pass,-0.174420 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number039,beadchip_ABC123456,supplier_WeylandYutani00039,xenomorph_cohort,urn:wtsi:plate0001_H05_sample000039,Included,ssbc00000,B16,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.2500,Pass,0,Pass,0.104762,Pass,0.994305,Pass,-0.242296 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number040,beadchip_ABC123456,supplier_WeylandYutani00040,xenomorph_cohort,urn:wtsi:plate0001_A06_sample000040,Included,ssbc00000,B17,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.1860,Pass,0,Pass,0.104762,Pass,0.971424,Pass,-0.064558 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number041,beadchip_ABC123456,supplier_WeylandYutani00041,xenomorph_cohort,urn:wtsi:plate0001_B06_sample000041,Included,ssbc00000,B18,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,1.000000,Pass,0.3043,Pass,0,Pass,0.266667,Pass,0.987562,Pass,-0.126078 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number042,beadchip_ABC123456,supplier_WeylandYutani00042,xenomorph_cohort,urn:wtsi:plate0001_C06_sample000042,Included,ssbc00000,B19,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.1111,Pass,0,Pass,0.104762,Pass,1.005553,Pass,-0.140127 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number043,beadchip_ABC123456,supplier_WeylandYutani00043,xenomorph_cohort,urn:wtsi:plate0001_D06_sample000043,Included,ssbc00000,B20,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.2000,Pass,0,Pass,0.104762,Pass,0.999255,Pass,-0.228706 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number045,beadchip_ABC123456,supplier_WeylandYutani00045,xenomorph_cohort,urn:wtsi:plate0001_F06_sample000045,Included,ssbc00000,B22,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.990099,Pass,0.3333,Pass,0,Pass,0.266667,Pass,1.031918,Pass,-0.029179 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number046,beadchip_ABC123456,supplier_WeylandYutani00046,xenomorph_cohort,urn:wtsi:plate0001_G06_sample000046,Included,ssbc00000,B23,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.2174,Pass,0,Pass,0.1,Pass,1.015049,Pass,-0.137649 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number047,beadchip_ABC123456,supplier_WeylandYutani00047,xenomorph_cohort,urn:wtsi:plate0001_H06_sample000047,Included,ssbc00000,B24,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.980198,Pass,0.3778,Pass,0,Pass,0.252381,Pass,0.966256,Pass,-0.182645 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number048,beadchip_ABC123456,supplier_WeylandYutani00048,xenomorph_cohort,urn:wtsi:plate0001_A07_sample000048,Included,ssbc00000,C01,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.2222,Pass,0,Pass,0.119048,Pass,0.996882,Pass,-0.128223 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number049,beadchip_ABC123456,supplier_WeylandYutani00049,xenomorph_cohort,urn:wtsi:plate0001_B07_sample000049,Included,ssbc00000,C02,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.2727,Pass,0,Pass,0.104762,Pass,0.967954,Pass,-0.097461 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number050,beadchip_ABC123456,supplier_WeylandYutani00050,xenomorph_cohort,urn:wtsi:plate0001_C07_sample000050,Included,ssbc00000,C03,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.1818,Pass,0,Pass,0.119048,Pass,1.038989,Pass,-0.141620 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number051,beadchip_ABC123456,supplier_WeylandYutani00051,xenomorph_cohort,urn:wtsi:plate0001_D07_sample000051,Included,ssbc00000,C04,Pass,Pass,NA,Pass,0,Pass,1.000000,Female,Female,Pass,0.970297,Pass,0.2667,Pass,0,Pass,0.209524,Pass,0.949601,Pass,-0.138272 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number052,beadchip_ABC123456,supplier_WeylandYutani00052,xenomorph_cohort,urn:wtsi:plate0001_E07_sample000052,Included,ssbc00000,C05,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.970297,Pass,0.3333,Pass,0,Pass,0.242857,Pass,1.007954,Pass,-0.107912 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number053,beadchip_ABC123456,supplier_WeylandYutani00053,xenomorph_cohort,urn:wtsi:plate0001_F07_sample000053,Included,ssbc00000,C06,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.1778,Pass,0,Pass,0.214286,Pass,0.988512,Pass,-0.115962 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number054,beadchip_ABC123456,supplier_WeylandYutani00054,xenomorph_cohort,urn:wtsi:plate0001_G07_sample000054,Included,ssbc00000,C07,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2791,Pass,0,Pass,0.128571,Pass,0.970802,Pass,-0.042666 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number055,beadchip_ABC123456,supplier_WeylandYutani00055,xenomorph_cohort,urn:wtsi:plate0001_H07_sample000055,Included,ssbc00000,C08,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.3636,Pass,0,Pass,0.242857,Pass,0.986665,Pass,-0.168941 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number056,beadchip_ABC123456,supplier_WeylandYutani00056,xenomorph_cohort,urn:wtsi:plate0001_A08_sample000056,Included,ssbc00000,C09,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.2826,Pass,0,Pass,0.242857,Pass,1.004768,Pass,-0.107821 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number057,beadchip_ABC123456,supplier_WeylandYutani00057,xenomorph_cohort,urn:wtsi:plate0001_B08_sample000057,Included,ssbc00000,C10,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.960396,Pass,0.2326,Pass,0,Pass,0.209524,Pass,1.003525,Pass,-0.119656 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number058,beadchip_ABC123456,supplier_WeylandYutani00058,xenomorph_cohort,urn:wtsi:plate0001_C08_sample000058,Included,ssbc00000,C11,Fail,Pass,NA,Pass,0,Fail,0.500000,Female,Male,Pass,0.980198,Pass,0.1556,Pass,0,Pass,0.090476,Pass,1.036527,Pass,-0.061958 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number059,beadchip_ABC123456,supplier_WeylandYutani00059,xenomorph_cohort,urn:wtsi:plate0001_D08_sample000059,Included,ssbc00000,C12,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.960396,Pass,0.1591,Pass,0,Pass,0.22381,Pass,1.035433,Pass,-0.227489 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number060,beadchip_ABC123456,supplier_WeylandYutani00060,xenomorph_cohort,urn:wtsi:plate0001_E08_sample000060,Included,ssbc00000,C13,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2093,Pass,0,Pass,0.128571,Pass,0.977505,Pass,-0.149240 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number061,beadchip_ABC123456,supplier_WeylandYutani00061,xenomorph_cohort,urn:wtsi:plate0001_F08_sample000061,Included,ssbc00000,C14,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.1778,Pass,0,Pass,0.128571,Pass,1.013088,Pass,-0.196718 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number062,beadchip_ABC123456,supplier_WeylandYutani00062,xenomorph_cohort,urn:wtsi:plate0001_G08_sample000062,Included,ssbc00000,C15,Fail,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Fail,0.940594,Pass,0.2195,Pass,0,Pass,0.104762,Pass,1.015147,Pass,-0.123747 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number063,beadchip_ABC123456,supplier_WeylandYutani00063,xenomorph_cohort,urn:wtsi:plate0001_H08_sample000063,Included,ssbc00000,C16,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2222,Pass,0,Pass,0.142857,Pass,0.986621,Pass,-0.058612 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number064,beadchip_ABC123456,supplier_WeylandYutani00064,xenomorph_cohort,urn:wtsi:plate0001_A09_sample000064,Included,ssbc00000,C17,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2889,Pass,0,Pass,0.147619,Pass,0.991262,Pass,-0.250982 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number065,beadchip_ABC123456,supplier_WeylandYutani00065,xenomorph_cohort,urn:wtsi:plate0001_B09_sample000065,Included,ssbc00000,C18,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2381,Pass,0,Pass,0.090476,Pass,1.002049,Pass,-0.208031 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number066,beadchip_ABC123456,supplier_WeylandYutani00066,xenomorph_cohort,urn:wtsi:plate0001_C09_sample000066,Included,ssbc00000,C19,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.950495,Pass,0.3023,Pass,0,Pass,0.290476,Pass,0.971911,Pass,-0.117757 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number067,beadchip_ABC123456,supplier_WeylandYutani00067,xenomorph_cohort,urn:wtsi:plate0001_D09_sample000067,Included,ssbc00000,C20,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.980198,Pass,0.2391,Pass,0,Pass,0.22381,Pass,0.999049,Pass,-0.100353 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number068,beadchip_ABC123456,supplier_WeylandYutani00068,xenomorph_cohort,urn:wtsi:plate0001_E09_sample000068,Included,ssbc00000,C21,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.1905,Pass,0,Pass,0.1,Pass,1.029257,Pass,-0.090627 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number069,beadchip_ABC123456,supplier_WeylandYutani00069,xenomorph_cohort,urn:wtsi:plate0001_F09_sample000069,Included,ssbc00000,C22,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.960396,Pass,0.2609,Pass,0,Pass,0.257143,Pass,1.039111,Pass,-0.108667 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number070,beadchip_ABC123456,supplier_WeylandYutani00070,xenomorph_cohort,urn:wtsi:plate0001_G09_sample000070,Included,ssbc00000,C23,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.960396,Pass,0.2727,Pass,0,Pass,0.219048,Pass,0.994820,Pass,-0.125439 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number071,beadchip_ABC123456,supplier_WeylandYutani00071,xenomorph_cohort,urn:wtsi:plate0001_H09_sample000071,Included,ssbc00000,C24,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2826,Pass,0,Pass,0.171429,Pass,0.964609,Pass,-0.166427 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number072,beadchip_ABC123456,supplier_WeylandYutani00072,xenomorph_cohort,urn:wtsi:plate0001_A10_sample000072,Included,ssbc00000,D01,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2381,Pass,0,Pass,0.085714,Pass,0.966010,Pass,-0.086076 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number073,beadchip_ABC123456,supplier_WeylandYutani00073,xenomorph_cohort,urn:wtsi:plate0001_B10_sample000073,Included,ssbc00000,D02,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.2889,Pass,0,Pass,0.109524,Pass,0.992177,Pass,-0.120641 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number074,beadchip_ABC123456,supplier_WeylandYutani00074,xenomorph_cohort,urn:wtsi:plate0001_C10_sample000074,Included,ssbc00000,D03,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.4000,Pass,0,Pass,0.180952,Pass,0.998443,Pass,-0.174678 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number075,beadchip_ABC123456,supplier_WeylandYutani00075,xenomorph_cohort,urn:wtsi:plate0001_D10_sample000075,Included,ssbc00000,D04,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.1364,Pass,0,Pass,0.119048,Pass,1.028869,Pass,-0.065512 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number076,beadchip_ABC123456,supplier_WeylandYutani00076,xenomorph_cohort,urn:wtsi:plate0001_E10_sample000076,Included,ssbc00000,D05,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.3556,Pass,0,Pass,0.157143,Pass,0.976225,Pass,-0.090911 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number077,beadchip_ABC123456,supplier_WeylandYutani00077,xenomorph_cohort,urn:wtsi:plate0001_F10_sample000077,Included,ssbc00000,D06,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2391,Pass,0,Pass,0.114286,Pass,1.002032,Pass,-0.127432 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number078,beadchip_ABC123456,supplier_WeylandYutani00078,xenomorph_cohort,urn:wtsi:plate0001_G10_sample000078,Included,ssbc00000,D07,Fail,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Fail,0.930693,Pass,0.1951,Pass,0,Pass,0.109524,Pass,0.980207,Pass,-0.202984 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number079,beadchip_ABC123456,supplier_WeylandYutani00079,xenomorph_cohort,urn:wtsi:plate0001_H10_sample000079,Included,ssbc00000,D08,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.960396,Pass,0.3478,Pass,0,Pass,0.209524,Pass,1.003459,Pass,-0.141715 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number080,beadchip_ABC123456,supplier_WeylandYutani00080,xenomorph_cohort,urn:wtsi:plate0001_A11_sample000080,Included,ssbc00000,D09,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.980198,Pass,0.3409,Pass,0,Pass,0.266667,Pass,1.037730,Pass,-0.245462 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number081,beadchip_ABC123456,supplier_WeylandYutani00081,xenomorph_cohort,urn:wtsi:plate0001_B11_sample000081,Included,ssbc00000,D10,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.960396,Pass,0.2444,Pass,0,Pass,0.114286,Pass,1.023849,Pass,-0.067639 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number082,beadchip_ABC123456,supplier_WeylandYutani00082,xenomorph_cohort,urn:wtsi:plate0001_C11_sample000082,Included,ssbc00000,D11,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.970297,Pass,0.2500,Pass,0,Pass,0.27619,Pass,0.987439,Pass,-0.134052 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number083,beadchip_ABC123456,supplier_WeylandYutani00083,xenomorph_cohort,urn:wtsi:plate0001_D11_sample000083,Included,ssbc00000,D12,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.2222,Pass,0,Pass,0.128571,Pass,1.041952,Pass,-0.147986 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number084,beadchip_ABC123456,supplier_WeylandYutani00084,xenomorph_cohort,urn:wtsi:plate0001_E11_sample000084,Included,ssbc00000,D13,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.990099,Pass,0.1304,Pass,0,Pass,0.195238,Pass,0.992127,Pass,-0.144665 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number085,beadchip_ABC123456,supplier_WeylandYutani00085,xenomorph_cohort,urn:wtsi:plate0001_F11_sample000085,Included,ssbc00000,D14,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.980198,Pass,0.2222,Pass,0,Pass,0.280952,Pass,0.999356,Pass,-0.191296 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number086,beadchip_ABC123456,supplier_WeylandYutani00086,xenomorph_cohort,urn:wtsi:plate0001_G11_sample000086,Included,ssbc00000,D15,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.980198,Pass,0.2727,Pass,0,Pass,0.128571,Pass,1.038192,Pass,-0.108705 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number087,beadchip_ABC123456,supplier_WeylandYutani00087,xenomorph_cohort,urn:wtsi:plate0001_H11_sample000087,Included,ssbc00000,D16,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.990099,Pass,0.1957,Pass,0,Pass,0.247619,Pass,0.996005,Pass,-0.159525 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number088,beadchip_ABC123456,supplier_WeylandYutani00088,xenomorph_cohort,urn:wtsi:plate0001_A12_sample000088,Included,ssbc00000,D17,Fail,Pass,NA,Pass,0,Fail,0.000000,Male,Female,Pass,0.970297,Pass,0.1556,Pass,0,Pass,0.209524,Pass,1.018251,Pass,-0.240315 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number089,beadchip_ABC123456,supplier_WeylandYutani00089,xenomorph_cohort,urn:wtsi:plate0001_B12_sample000089,Included,ssbc00000,D18,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2444,Pass,0,Pass,0.152381,Pass,0.999501,Pass,-0.190969 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number090,beadchip_ABC123456,supplier_WeylandYutani00090,xenomorph_cohort,urn:wtsi:plate0001_C12_sample000090,Included,ssbc00000,D19,Fail,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Fail,0.940594,Pass,0.2558,Pass,0,Pass,0.12381,Pass,0.980026,Pass,-0.105226 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number091,beadchip_ABC123456,supplier_WeylandYutani00091,xenomorph_cohort,urn:wtsi:plate0001_D12_sample000091,Included,ssbc00000,D20,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.990099,Pass,0.1957,Pass,0,Pass,0.190476,Pass,0.977348,Pass,-0.129244 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number092,beadchip_ABC123456,supplier_WeylandYutani00092,xenomorph_cohort,urn:wtsi:plate0001_E12_sample000092,Included,ssbc00000,D21,Fail,Pass,NA,Pass,0,Fail,0.500000,Female,Male,Pass,0.980198,Pass,0.3043,Pass,0,Pass,0.138095,Pass,1.023070,Pass,-0.113106 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number093,beadchip_ABC123456,supplier_WeylandYutani00093,xenomorph_cohort,urn:wtsi:plate0001_F12_sample000093,Included,ssbc00000,D22,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.950495,Pass,0.2273,Pass,0,Pass,0.128571,Pass,0.977187,Pass,-0.193157 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number094,beadchip_ABC123456,supplier_WeylandYutani00094,xenomorph_cohort,urn:wtsi:plate0001_G12_sample000094,Included,ssbc00000,D23,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.3261,Pass,0,Pass,0.12381,Pass,1.029707,Pass,-0.148270 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number095,beadchip_ABC123456,supplier_WeylandYutani00095,xenomorph_cohort,urn:wtsi:plate0001_H12_sample000095,Included,ssbc00000,D24,Pass,Pass,NA,Pass,0,Pass,0.500000,Female,Female,Pass,0.950495,Pass,0.3478,Pass,0,Pass,0.285714,Pass,0.967335,Pass,-0.196799 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number000,beadchip_ABC123456,supplier_WeylandYutani00096,xenomorph_cohort,urn:wtsi:plate0002_A01_sample000096,Included,ssbc00001,A01,Fail,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Fail,0.940594,Pass,0.3023,Pass,0,Pass,0.147619,Pass,1.037985,Pass,-0.149908 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number001,beadchip_ABC123456,supplier_WeylandYutani00097,xenomorph_cohort,urn:wtsi:plate0002_B01_sample000097,Included,ssbc00001,A02,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.970297,Pass,0.2174,Pass,0,Pass,0.114286,Pass,0.997330,Pass,-0.240348 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number002,beadchip_ABC123456,supplier_WeylandYutani00098,xenomorph_cohort,urn:wtsi:plate0002_C01_sample000098,Included,ssbc00001,A03,Pass,Pass,NA,Pass,0,Pass,0.000000,Male,Male,Pass,0.990099,Pass,0.2826,Pass,0,Pass,0.138095,Pass,1.039791,Pass,-0.162776 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number003,beadchip_ABC123456,supplier_WeylandYutani00099,xenomorph_cohort,urn:wtsi:plate0002_D01_sample000099,Included,ssbc00001,A04,Fail,Pass,NA,Pass,0,Fail,0.500000,Female,Male,Pass,0.960396,Pass,0.3478,Pass,0,Pass,0.157143,Pass,0.975443,Pass,-0.109670 +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number000,beadchip_ABC123456,supplier_WeylandYutani00000,xenomorph_cohort,urn:wtsi:plate0001_A01_sample000000,Excluded,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number003,beadchip_ABC123456,supplier_WeylandYutani00003,xenomorph_cohort,urn:wtsi:plate0001_D01_sample000003,Excluded,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number022,beadchip_ABC123456,supplier_WeylandYutani00022,xenomorph_cohort,urn:wtsi:plate0001_G03_sample000022,Excluded,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +pipeline_run,test_project,ib5,HumanOmni25-8v1,rowcol_number044,beadchip_ABC123456,supplier_WeylandYutani00044,xenomorph_cohort,urn:wtsi:plate0001_E06_sample000044,Excluded,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/src/perl/t/query_project_samples.t b/src/perl/t/query_project_samples.t index b82396911..6a023b4cc 100644 --- a/src/perl/t/query_project_samples.t +++ b/src/perl/t/query_project_samples.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Infinium::SampleQueryTest; -Test::Class->runtests; \ No newline at end of file +WTSI::NPG::Genotyping::Infinium::SampleQueryTest->runtests; diff --git a/src/perl/t/ready_plex.t b/src/perl/t/ready_plex.t deleted file mode 100644 index 42084c8b9..000000000 --- a/src/perl/t/ready_plex.t +++ /dev/null @@ -1,9 +0,0 @@ - -use utf8; - -use strict; -use warnings; - -use WTSI::NPG::Genotyping::VCF::ReadyPlexCallsTest; - -Test::Class->runtests; diff --git a/src/perl/t/ready_workflow.t b/src/perl/t/ready_workflow.t new file mode 100644 index 000000000..f2814343d --- /dev/null +++ b/src/perl/t/ready_workflow.t @@ -0,0 +1,9 @@ + +use utf8; + +use strict; +use warnings; + +use WTSI::NPG::Genotyping::VCF::ReadyWorkflowTest; + +WTSI::NPG::Genotyping::VCF::ReadyWorkflowTest->runtests; diff --git a/src/perl/t/reference_finder.t b/src/perl/t/reference_finder.t index fd7bf50b8..3bc60f18d 100644 --- a/src/perl/t/reference_finder.t +++ b/src/perl/t/reference_finder.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::VCF::ReferenceFinderTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::VCF::ReferenceFinderTest->runtests; diff --git a/src/perl/t/scripts.t b/src/perl/t/scripts.t index 511efdc77..7132f600c 100644 --- a/src/perl/t/scripts.t +++ b/src/perl/t/scripts.t @@ -3,6 +3,7 @@ use utf8; use strict; use warnings; + use WTSI::NPG::Genotyping::ScriptsTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::ScriptsTest->runtests; diff --git a/src/perl/t/sequenom_assay_data_object.t b/src/perl/t/sequenom_assay_data_object.t index a4e586bd3..96c72b2cc 100644 --- a/src/perl/t/sequenom_assay_data_object.t +++ b/src/perl/t/sequenom_assay_data_object.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Sequenom::AssayDataObjectTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Sequenom::AssayDataObjectTest->runtests; diff --git a/src/perl/t/sequenom_assay_result.t b/src/perl/t/sequenom_assay_result.t index 50c90e69f..eb493ef89 100644 --- a/src/perl/t/sequenom_assay_result.t +++ b/src/perl/t/sequenom_assay_result.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Sequenom::AssayResultTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Sequenom::AssayResultTest->runtests; diff --git a/src/perl/t/sequenom_assay_resultset.t b/src/perl/t/sequenom_assay_resultset.t index da44cdcd6..c8b57d8d8 100644 --- a/src/perl/t/sequenom_assay_resultset.t +++ b/src/perl/t/sequenom_assay_resultset.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Sequenom::AssayResultSetTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Sequenom::AssayResultSetTest->runtests; diff --git a/src/perl/t/sequenom_publisher.t b/src/perl/t/sequenom_publisher.t index 01c82021b..ff8a70a27 100644 --- a/src/perl/t/sequenom_publisher.t +++ b/src/perl/t/sequenom_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Sequenom::PublisherTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::Sequenom::PublisherTest->runtests; diff --git a/src/perl/t/sequenom_subscriber.t b/src/perl/t/sequenom_subscriber.t index b9ba31746..4cbb42f2f 100644 --- a/src/perl/t/sequenom_subscriber.t +++ b/src/perl/t/sequenom_subscriber.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::Sequenom::SubscriberTest; -Test::Class->runtests; \ No newline at end of file +WTSI::NPG::Genotyping::Sequenom::SubscriberTest->runtests; diff --git a/src/perl/t/sequenom_subscriber/chromosome_lengths_GRCh37.json b/src/perl/t/sequenom_subscriber/chromosome_lengths_GRCh37.json new file mode 100644 index 000000000..8d6e46bda --- /dev/null +++ b/src/perl/t/sequenom_subscriber/chromosome_lengths_GRCh37.json @@ -0,0 +1 @@ +{"11":135006516,"21":48129895,"7":159138663,"Y":59373566,"17":81195210,"2":243199373,"22":51304566,"1":249250621,"18":78077248,"16":90354753,"13":115169878,"6":171115067,"X":155270560,"3":198022430,"9":141213431,"12":133851895,"20":63025520,"14":107349540,"15":102531392,"8":146364022,"4":191154276,"19":59128983,"10":135534747,"5":180915260} \ No newline at end of file diff --git a/src/perl/t/simple_publisher.t b/src/perl/t/simple_publisher.t index a5192d1e1..befebf971 100644 --- a/src/perl/t/simple_publisher.t +++ b/src/perl/t/simple_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::SimplePublisherTest; -Test::Class->runtests; +WTSI::NPG::SimplePublisherTest->runtests; diff --git a/src/perl/t/snp.t b/src/perl/t/snp.t index e4687dbe3..b310f3272 100644 --- a/src/perl/t/snp.t +++ b/src/perl/t/snp.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::SNPTest; -Test::Class->runtests; \ No newline at end of file +WTSI::NPG::Genotyping::SNPTest->runtests; diff --git a/src/perl/t/snpset.t b/src/perl/t/snpset.t index c91cbb2a4..96c6656d7 100644 --- a/src/perl/t/snpset.t +++ b/src/perl/t/snpset.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::SNPSetTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::SNPSetTest->runtests; diff --git a/src/perl/t/snpset_publisher.t b/src/perl/t/snpset_publisher.t index 72503e251..90fd1f8ed 100644 --- a/src/perl/t/snpset_publisher.t +++ b/src/perl/t/snpset_publisher.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::SNPSetPublisherTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::SNPSetPublisherTest->runtests; diff --git a/src/perl/t/update_plink_annotation.t b/src/perl/t/update_plink_annotation.t index 090805190..9de8ec7fe 100644 --- a/src/perl/t/update_plink_annotation.t +++ b/src/perl/t/update_plink_annotation.t @@ -1,10 +1,9 @@ -# Tests update_plink_annotation.pl use utf8; + use strict; use warnings; use WTSI::NPG::Genotyping::UpdatePlinkAnnotationTest; -Test::Class->runtests; - +WTSI::NPG::Genotyping::UpdatePlinkAnnotationTest->runtests; diff --git a/src/perl/t/utilities.t b/src/perl/t/utilities.t index b2421796c..a3bfb02c4 100644 --- a/src/perl/t/utilities.t +++ b/src/perl/t/utilities.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::UtilitiesTest; -Test::Class->runtests; +WTSI::NPG::UtilitiesTest->runtests; diff --git a/src/perl/t/vcf.t b/src/perl/t/vcf.t index f80a9f621..711c6b6a6 100644 --- a/src/perl/t/vcf.t +++ b/src/perl/t/vcf.t @@ -6,4 +6,4 @@ use warnings; use WTSI::NPG::Genotyping::VCF::VCFTest; -Test::Class->runtests; +WTSI::NPG::Genotyping::VCF::VCFTest->runtests; diff --git a/src/perl/t/vcf/4_samples.db b/src/perl/t/vcf/4_samples.db new file mode 100644 index 000000000..53b06f0f2 Binary files /dev/null and b/src/perl/t/vcf/4_samples.db differ diff --git a/src/perl/t/vcf/fluidigm.vcf b/src/perl/t/vcf/fluidigm.vcf index f20df8656..30b6930fa 100644 --- a/src/perl/t/vcf/fluidigm.vcf +++ b/src/perl/t/vcf/fluidigm.vcf @@ -30,7 +30,7 @@ ##FORMAT= ##FORMAT= ##FORMAT= -##callset_name=fluidigm +##callset_name=fluidigm_qc ##plex_name=qc ##plex_type=fluidigm #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample001 sample002 sample003 sample004 diff --git a/src/perl/t/vcf/fluidigm_header_1.txt b/src/perl/t/vcf/fluidigm_header_1.txt index e45b5e26d..ef0dffb5d 100644 --- a/src/perl/t/vcf/fluidigm_header_1.txt +++ b/src/perl/t/vcf/fluidigm_header_1.txt @@ -30,7 +30,7 @@ ##FORMAT= ##FORMAT= ##FORMAT= -##callset_name=fluidigm +##callset_name=fluidigm_qc ##plex_name=qc ##plex_type=fluidigm #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample001 sample002 sample003 sample004 diff --git a/src/perl/t/vcf/fluidigm_header_2.txt b/src/perl/t/vcf/fluidigm_header_2.txt index 567b095e3..716c474f1 100644 --- a/src/perl/t/vcf/fluidigm_header_2.txt +++ b/src/perl/t/vcf/fluidigm_header_2.txt @@ -30,7 +30,7 @@ ##FORMAT= ##FORMAT= ##FORMAT= -##callset_name=fluidigm +##callset_name=fluidigm_qc ##plex_name=qc ##plex_type=fluidigm #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT north south east west diff --git a/src/perl/t/vcf/fluidigm_samples.json b/src/perl/t/vcf/fluidigm_samples.json deleted file mode 100644 index 39c9d9806..000000000 --- a/src/perl/t/vcf/fluidigm_samples.json +++ /dev/null @@ -1 +0,0 @@ -["sample_001", "sample_002", "sample_003", "sample_004"] \ No newline at end of file diff --git a/src/perl/t/vcf/sequenom.vcf b/src/perl/t/vcf/sequenom.vcf index 84e9f8c94..11923a606 100644 --- a/src/perl/t/vcf/sequenom.vcf +++ b/src/perl/t/vcf/sequenom.vcf @@ -30,7 +30,7 @@ ##FORMAT= ##FORMAT= ##FORMAT= -##callset_name=sequenom +##callset_name=sequenom_W30467 ##plex_name=W30467 ##plex_type=sequenom #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample001 sample002 sample003 sample004 diff --git a/src/perl/t/vcf/sequenom_samples.json b/src/perl/t/vcf/sequenom_samples.json deleted file mode 100644 index 39c9d9806..000000000 --- a/src/perl/t/vcf/sequenom_samples.json +++ /dev/null @@ -1 +0,0 @@ -["sample_001", "sample_002", "sample_003", "sample_004"] \ No newline at end of file diff --git a/src/ruby/genotyping-workflows/lib/genotyping/tasks/quality_control.rb b/src/ruby/genotyping-workflows/lib/genotyping/tasks/quality_control.rb index 282c9ef54..d9a89e149 100644 --- a/src/ruby/genotyping-workflows/lib/genotyping/tasks/quality_control.rb +++ b/src/ruby/genotyping-workflows/lib/genotyping/tasks/quality_control.rb @@ -1,6 +1,6 @@ #-- encoding: UTF-8 # -# Copyright (c) 2012 Genome Research Ltd. All rights reserved. +# Copyright (c) 2012, 2016 Genome Research Ltd. All rights reserved. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -50,13 +50,14 @@ def quality_control(dbfile, input, output, Dir.mkdir(output) unless File.exist?(output) cli_args = args.merge({:dbpath => dbfile, - :output_dir => output}) - + :output_dir => output, + :plink => base}) + margs = [dbfile, input, output] command = [RUN_QC, cli_arg_map(cli_args, :prefix => '--') { |key| - key.gsub(/_/, '-') }, base].flatten.join(' ') + key.gsub(/_/, '-') }].flatten.join(' ') task_id = task_identity(:quality_control, *margs) log = File.join(log_dir, task_id + '.log') diff --git a/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_illuminus.rb b/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_illuminus.rb index 06f22cc2c..972eb9b86 100644 --- a/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_illuminus.rb +++ b/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_illuminus.rb @@ -1,6 +1,6 @@ #-- encoding: UTF-8 # -# Copyright (c) 2012, 2015 Genome Research Ltd. All rights reserved. +# Copyright (c) 2012, 2015, 2016 Genome Research Ltd. All rights reserved. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -41,14 +41,14 @@ class GenotypeIlluminus < Percolate::Workflow Arguments: -- db_file (String): The SQLite pipeline database file. -- run_name (String): The name of a pipeline run defined in the pipeline database. -- work_dir (String): The working directory, an absolute path. +- db_file : The SQLite pipeline database file. +- run_name : The name of a pipeline run defined in the pipeline database. +- work_dir : The working directory, an absolute path. - other arguments (keys and values): - config: of custom pipeline database .ini file. Optional. - manifest: of the chip manifest file. Required. - - plex_manifest: of the qc plex manifest file. Required. + - plex_manifest: containing paths to one or more qc plex manifest files. Required. - gender_method: name of a gender determination method described in methods.ini. Optional, defaults to 'Inferred' - chunk_size: number of SNPs to analyse in a single Illuminus job. @@ -62,8 +62,8 @@ class GenotypeIlluminus < Percolate::Workflow - nofilter: omit the prefilter on GenCall QC. Optional. If true, overrides the filterconfig argument. - fam_dummy: Dummy value for missing paternal/maternal ID or phenotype in Plink .fam output. Must be equal to 0 or -9. Optional, defaults to -9. - - vcf: Path to VCF file for identity QC - - plex_manifest: Path to plex manifest file for identity QC + - vcf: containing paths to one or more VCF files for identity QC + - plex_manifest: containing paths to one or more plex manifest files for identity QC e.g. @@ -76,8 +76,12 @@ class GenotypeIlluminus < Percolate::Workflow - config: /work/my_project/pipeline/pipedb.ini queue: small manifest: /genotyping/manifests/Human670-QuadCustom_v1_A.bpm.csv - vcf: /work/my_project/qc_calls.vcf - plex_manifest: /genotyping/manifests/qc.tsv + vcf: + - /work/my_project/qc_calls_foo.vcf + - /work/my_project/qc_calls_bar.vcf + plex_manifest: + -/genotyping/manifests/qc_foo.tsv + -/genotyping/manifests/qc_bar.tsv Returns: @@ -106,7 +110,7 @@ def run(dbfile, run_name, work_dir, args = {}) gtconfig = args.delete(:config) fconfig = args.delete(:filterconfig) || nil nofilter = args.delete(:nofilter) || nil - vcf = args.delete(:vcf) || nil + vcf = args.delete(:vcf) || Array.new() args.delete(:memory) args.delete(:queue) @@ -121,7 +125,7 @@ def run(dbfile, run_name, work_dir, args = {}) :log_dir => log_dir}.merge(args) maybe_version_log(log_dir) - run_name = run_name.to_s; + run_name = run_name.to_s gcsjname = run_name + '.gencall.sample.json' sjname = run_name + '.illuminus.sample.json' njname = run_name + '.snp.json' @@ -143,19 +147,19 @@ def run(dbfile, run_name, work_dir, args = {}) gcquality = true else ## run gencall QC to apply gencall CR filter and find genders - gcqcargs = {:run => run_name, - :plex_manifest => plex_manifest}.merge(args) + gcqcargs = {:run => run_name}.merge(args) if fconfig gcqcargs = {:filter => fconfig}.merge(gcqcargs) else gcqcargs = {:illuminus_filter => true}.merge(gcqcargs) end - if vcf and plex_manifest - gcqcargs = { - :vcf => vcf, - :plex_manifest => plex_manifest, - :sample_json => gcsjson - }.merge(gcqcargs) + if (not vcf.empty?) and (not plex_manifest.empty?) + # use comma-separated lists of VCF/plex files in QC args + gcqcargs = gcqcargs.merge({ + :vcf => vcf.join(","), + :plex_manifest => plex_manifest.join(","), + :sample_json => gcsjson + }) # overwrites original values in gcqcargs end gcqcdir = File.join(work_dir, 'gencall_qc') @@ -207,12 +211,13 @@ def run(dbfile, run_name, work_dir, args = {}) :run => run_name, :sim => smfile }.merge(args) - if vcf and plex_manifest - qcargs = { - :vcf => vcf, - :plex_manifest => plex_manifest, + if (not vcf.empty?) and (not plex_manifest.empty?) + # use comma-separated lists of VCF/plex files in QC args + qcargs = qcargs.merge({ + :vcf => vcf.join(","), + :plex_manifest => plex_manifest.join(","), :sample_json => sjson - }.merge(qcargs) + }) # overwrites original values in qcargs end ilquality = quality_control(dbfile, ilfile, output, qcargs, async) diff --git a/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_zcall.rb b/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_zcall.rb index 4a2a55515..c9117a5c0 100644 --- a/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_zcall.rb +++ b/src/ruby/genotyping-workflows/lib/genotyping/workflows/genotype_zcall.rb @@ -1,6 +1,6 @@ #-- encoding: UTF-8 # -# Copyright (c) 2013, 2015 Genome Research Ltd. All rights reserved. +# Copyright (c) 2013, 2015, 2016 Genome Research Ltd. All rights reserved. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -69,8 +69,10 @@ class GenotypeZCall < Percolate::Workflow memory: number of Mb to request for jobs. queue: An LSF queue hint. Optional, defaults to'normal'. - vcf: Path to VCF file for identity QC - plex_manifest: Path to plex manifest file for identity QC + vcf: containing paths to one or more VCF files for + identity QC + plex_manifest: containing paths to one or more plex manifest + files for identity QC e.g. @@ -86,8 +88,12 @@ class GenotypeZCall < Percolate::Workflow manifest: /genotyping/manifests/Human670-QuadCustom_v1_A.bpm.csv plex_manifest: /genotyping/manifests/fluidigm.tsv egt: /genotyping/clusters/Human670-QuadCustom_v1.egt - vcf: /work/my_project/qc_calls.vcf - plex_manifest: /genotyping/manifests/qc.tsv + vcf: + - /work/my_project/qc_calls_foo.vcf + - /work/my_project/qc_calls_bar.vcf + plex_manifest: + - /genotyping/manifests/qc_foo.tsv + - /genotyping/manifests/qc_bar.tsv Returns: @@ -116,7 +122,7 @@ def run(dbfile, run_name, work_dir, args = {}) fconfig = args.delete(:filterconfig) || nil nofilter = args.delete(:nofilter) || nil nosim = args.delete(:nosim) || nil # omit sim files for qc? - vcf = args.delete(:vcf) || nil + vcf = args.delete(:vcf) || Array.new() args.delete(:memory) args.delete(:queue) @@ -229,17 +235,17 @@ def run(dbfile, run_name, work_dir, args = {}) zsimfile = gtc_to_sim(sjson, manifest, zsimname, smargs, async) if zsimfile qcargs = {:run => run_name, - :sim => zsimfile, - :plex_manifest => plex_manifest}.merge(args) + :sim => zsimfile}.merge(args) end end if qcargs # ready to start QC - if vcf and plex_manifest - qcargs = { - :vcf => vcf, - :plex_manifest => plex_manifest, + if (not vcf.empty?) and (not plex_manifest.empty?) + # use comma-separated lists of VCF/plex files in QC args + qcargs = qcargs.merge({ + :vcf => vcf.join(","), + :plex_manifest => plex_manifest.join(","), :sample_json => sjson - }.merge(qcargs) + }) # overwrites original contents of qcargs end zquality = quality_control(dbfile, zfile, zqc, qcargs, async) end @@ -275,20 +281,20 @@ def prefilter(dbfile, run_name, work_dir, fconfig, gcsjson, gcsfile = File.join(work_dir, gcsname) gcqcargs = {:run => run_name, :mafhet => true}.merge(args) if gcsimfile - gcqcargs = {:sim => gcsimfile, - :plex_manifest => plex_manifest}.merge(gcqcargs) + gcqcargs = {:sim => gcsimfile}.merge(gcqcargs) end if fconfig gcqcargs = {:filter => fconfig}.merge(gcqcargs) else gcqcargs = {:zcall_filter => true}.merge(gcqcargs) end - if vcf and plex_manifest - gcqcargs = { - :vcf => vcf, + if (not vcf.empty?) and (not plex_manifest.empty?) + # use comma-separated lists of VCF/plex files in QC args + gcqcargs = gcqcargs.merge({ + :vcf => vcf.join(","), :sample_json => gcsjson, - :plex_manifest => plex_manifest - }.merge(gcqcargs) + :plex_manifest => plex_manifest.join(",") + }) # overwrites original contents of gcqcargs end ## run gencall QC to get metrics for prefiltering diff --git a/src/ruby/genotyping-workflows/test/test_illuminus_workflow.rb b/src/ruby/genotyping-workflows/test/test_illuminus_workflow.rb index 2249d07ff..149daaa55 100644 --- a/src/ruby/genotyping-workflows/test/test_illuminus_workflow.rb +++ b/src/ruby/genotyping-workflows/test/test_illuminus_workflow.rb @@ -1,6 +1,6 @@ #-- encoding: UTF-8 # -# Copyright (c) 2012 Genome Research Ltd. All rights reserved. +# Copyright (c) 2012, 2016 Genome Research Ltd. All rights reserved. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -55,8 +55,9 @@ def test_genotype_illuminus FileUtils.copy(File.join(external_data, 'genotyping.db'), dbfile) fconfig = File.join(data_path, 'illuminus_test_prefilter.json') vcf = File.join(external_data, 'sequenom_abvc.vcf') - plex_manifest = File.join(external_data, - 'W30467_snp_set_info_GRCh37.tsv') + plex_0 = File.join(external_data, 'W30467_snp_set_info_GRCh37.tsv') + plex_1 = File.join(external_data, 'qc_fluidigm_snp_info_GRCh37.tsv') + # plex_1 not needed for workflow, but tests handling multiple plex args args_hash = {:manifest => manifest, :plex_manifest => plex_path, @@ -66,8 +67,8 @@ def test_genotype_illuminus :chunk_size => 10000, :memory => 2048, :queue => 'yesterday', - :vcf => vcf, - :plex_manifest => plex_manifest + :vcf => [vcf, ], + :plex_manifest => [plex_0, plex_1] } args = [dbfile, run_name, work_dir, args_hash] timeout = 1400 diff --git a/src/ruby/genotyping-workflows/test/test_zcall_workflow.rb b/src/ruby/genotyping-workflows/test/test_zcall_workflow.rb index daa673aca..7d07f653e 100644 --- a/src/ruby/genotyping-workflows/test/test_zcall_workflow.rb +++ b/src/ruby/genotyping-workflows/test/test_zcall_workflow.rb @@ -1,6 +1,6 @@ #-- encoding: UTF-8 # -# Copyright (c) 2012 Genome Research Ltd. All rights reserved. +# Copyright (c) 2012, 2016 Genome Research Ltd. All rights reserved. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,8 +58,9 @@ def test_genotype_zcall FileUtils.copy(File.join(external_data, 'genotyping.db'), dbfile) vcf = File.join(external_data, 'sequenom_abvc.vcf') - plex_manifest = File.join(external_data, - 'W30467_snp_set_info_GRCh37.tsv') + plex_0 = File.join(external_data, 'W30467_snp_set_info_GRCh37.tsv') + plex_1 = File.join(external_data, 'qc_fluidigm_snp_info_GRCh37.tsv') + # plex_1 not needed for workflow, but tests handling multiple plex args # Only 1 zscore in range; faster but omits threshold evaluation # The evaluation is tested by test_zcall_tasks.rb @@ -70,8 +71,9 @@ def test_genotype_zcall :zstart => 6, :ztotal => 1, :memory => 2048, - :vcf => vcf, - :plex_manifest => plex_manifest }] + :vcf => [vcf,], + :plex_manifest => [plex_0, plex_1] + }] timeout = 1800 # was 720 log = 'percolate.log' result = test_workflow(name, Genotyping::Workflows::GenotypeZCall,