Interpreting the scores

#!/usr/bin/env python3
import sys
import os
import csv
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import percentileofscore
# Function to read polygenic risk scores from a tab-delimited file, skipping header
def read_polygenic_scores(file_path, score_column):
scores = []
with open(file_path, 'r') as file:
# Skip the header line
next(file)
for line in file:
columns = line.strip().split('\t')
if len(columns) > score_column:
score = float(columns[score_column])
scores.append(score)
return scores
# Function to create a distribution plot of polygenic risk scores
def plot_polygenic_scores_distribution(scores):
plt.hist(scores, bins=20, edgecolor='black', alpha=0.7)
plt.title('Polygenic Risk Scores Distribution')
plt.xlabel('Polygenic Risk Scores')
plt.ylabel('Frequency')
#plt.savefig('polygenic_scores_distribution.png') # Save the plot as an image
#plt.show()
# Function to find the percentile of a given score in the distribution
def find_percentile(score, distribution):
return percentileofscore(distribution, score)
# Function to append data to the CSV file
def append_to_csv(file_path, data):
with open(file_path, 'a', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(data)
def main():
# Assign file paths
ukb_file_path = '/home/vsrinivasan75/ukb_prs/using_plink/PGS000785/ukb_chr1-22_v1_nomean_PGS000785.sscore'
tempus_file_path = sys.argv[1] if len(sys.argv) == 2 else None
# Check if the second file path is provided
if tempus_file_path is None:
print("Usage: python script.py tempus_file_path")
sys.exit(1)
# Read polygenic risk scores from the UK Biobank file (5th column)
polygenic_scores_ukb = read_polygenic_scores(ukb_file_path, score_column=4)
# Plot the distribution of polygenic risk scores for the UK Biobank file
plot_polygenic_scores_distribution(polygenic_scores_ukb)
# Read polygenic risk scores from the Tempus file (4th column)
polygenic_scores_tempus = read_polygenic_scores(tempus_file_path, score_column=3)
# Example: Find the percentile of the first score from the Tempus file
if polygenic_scores_tempus: # Check if the list is not empty
specific_score = polygenic_scores_tempus[0]
percentile = find_percentile(specific_score, polygenic_scores_ukb)
rounded_percentile = round(percentile, 2)
print(f'The percentile of the score {specific_score} in the UKB distribution is: {rounded_percentile}%')
# Extract required information from tempus_file_path
tempus_file_parts = tempus_file_path.split('/')
date = tempus_file_parts[-2].split('_')[0] # Extract the part before the underscore
id = tempus_file_parts[-1].split('.')[0] # Extract until the first occurrence of "."
# Check for duplicates before appending to the CSV file
data_to_append = [date, id, specific_score, rounded_percentile]
csv_file_path = 'GastroATL_PRS.csv'
with open(csv_file_path, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
if row[:3] == data_to_append[:3]: # Check if the first three columns match
print("Duplicate entry. Skipping...")
sys.exit(0)
# Append the data to the CSV file
append_to_csv(csv_file_path, data_to_append)
print("Data appended to GastroATL_PRS.csv")
else:
print("The Tempus file does not contain any polygenic risk scores.")
if __name__ == "__main__":
main()Score
Percentile
Risk level
PreviousPreparing the data and obtaining polygenic risk scoresNextClinical data from Gastroenterology Atlanta
Last updated