Preparing the data and obtaining polygenic risk scores
Last updated
Last updated
# To combine all the VCF files
bcftools concat 1.vcf.gz 2.vcf.gz 3.vcf.gz 4.vcf.gz 5.vcf.gz 6.vcf.gz 7.vcf.gz 8.vcf.gz 9.vcf.gz 10.vcf.gz 11.vcf.gz 12.vcf.gz 13.vcf.gz 14.vcf.gz 15.vcf.gz 16.vcf.gz 17.vcf.gz 18.vcf.gz 19.vcf.gz 20.vcf.gz 21.vcf.gz 22.vcf.gz -Oz -o combined.vcf.gz
# To remove duplicate IDs
bcftools view -H combined.vcf.gz | awk '!seen[$3]++' >> output.vcf
# To compress and index the VCF file
bgzip output.vcf
tabix -p vcf output.vcf.gzplink2
--vcf output.vcf.gz
--score /home/vsrinivasan75/ukb_prs/PGS000785.txt
1 4 6 no-mean-imputation
--out patient_id