25 lines
776 B
Python
25 lines
776 B
Python
|
from Bio import SeqIO
|
||
|
from Bio.Seq import Seq
|
||
|
|
||
|
# Załaduj sekwencje genów z pliku FASTA
|
||
|
def load_sequences(file_path):
|
||
|
return list(SeqIO.parse(file_path, "fasta"))
|
||
|
|
||
|
# Znajdź potencjalne sekwencje gRNA (20 nukleotydów + PAM "NGG")
|
||
|
def find_crispr_sites(sequence):
|
||
|
pam = "NGG"
|
||
|
sites = []
|
||
|
for i in range(len(sequence) - 23):
|
||
|
target = sequence[i:i+20]
|
||
|
pam_site = sequence[i+20:i+23]
|
||
|
if pam_site.endswith("GG"):
|
||
|
sites.append(target + pam_site)
|
||
|
return sites
|
||
|
|
||
|
# Przykład użycia
|
||
|
genes = load_sequences("arabidopsis_genome.fasta")
|
||
|
for gene in genes:
|
||
|
print(f"Gene ID: {gene.id}")
|
||
|
crispr_sites = find_crispr_sites(str(gene.seq))
|
||
|
print(f"Potential CRISPR sites: {crispr_sites[:5]}") # wyświetl tylko 5 pierwszych
|