bio/gfp2.py

324 lines
10 KiB
Python

#! /usr/bin/env python3
# vim:fenc=utf-8
#
# Copyright © 2024 user <user@penguin>
#
# Distributed under terms of the MIT license.
import os
from Bio import Entrez, SeqIO
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII, Bpu1102I
from Bio.Restriction import AllEnzymes, Analysis
from Bio.Graphics import GenomeDiagram
from reportlab.lib import colors
#%%
# Ustawienia Entrez
Entrez.email = "your_email@example.com" # Wpisz swój adres email
# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie
def fetch_and_save_sequence(db, id, file_path):
handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text")
record = SeqIO.read(handle, "genbank")
handle.close()
SeqIO.write(record, file_path, "genbank")
return record
# Funkcja do wczytywania sekwencji z lokalnego pliku
def load_local_sequence(file_path):
with open(file_path, 'r') as file:
record = SeqIO.read(file, "genbank")
return record
#%%
# ID dla sekwencji w NCBI
gfp_id = "U87974"
plasmid_id = "pET-28a(+)"
# Ścieżki do lokalnych plików
gfp_file_path = gfp_id+'.gb'
plasmid_file_path = plasmid_id+'.gb'
#%%
# Sprawdzanie i pobieranie sekwencji GFP
if not os.path.exists(gfp_file_path):
print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...")
gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path)
print(f"Pobrano i zapisano sekwencję GFP do pliku {gfp_file_path}.")
else:
print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
gfp_record = load_local_sequence(gfp_file_path)
print(f"Wczytano dane z pliku {gfp_file_path}.")
gfp_seq = str(gfp_record.seq)
print(f"GFP Sequence: {gfp_seq[:60]}...")
#%%
# Sprawdzanie i pobieranie sekwencji plazmidu
if not os.path.exists(plasmid_file_path):
print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...")
record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path)
print(f"Pobrano i zapisano sekwencję plazmidu do pliku {plasmid_file_path}.")
else:
print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
record = load_local_sequence(plasmid_file_path)
print(f"Wczytano dane z pliku {plasmid_file_path}.")
#%%
# Wyświetl etykiety (annotacje)
for feature in record.features:
print(f"Type: {feature.type}")
print(f"Location: {feature.location}")
if 'gene' in feature.qualifiers:
print(f"Gene: {feature.qualifiers['gene']}")
if 'product' in feature.qualifiers:
print(f"Product: {feature.qualifiers['product']}")
print()
#%%
#%%
# Przejdź przez wszystkie funkcje w rekordzie i wypisz ich szczegółowe informacje
for feature in record.features:
if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin"]:
print(f"Type: {feature.type}")
print(f"Location: {feature.location}")
print(f"Strand: {'+' if feature.strand == 1 else '-'}")
# Szczegółowe opisy
if feature.type == "CDS":
gene_name = feature.qualifiers.get('gene', ['Unknown gene'])[0]
product = feature.qualifiers.get('product', ['Unknown product'])[0]
print(f"Gene Name: {gene_name}")
print(f"Product: {product}")
elif feature.type == "protein_bind":
binding_site = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0]
print(f"Binding Site: {binding_site}")
elif feature.type == "misc_feature":
note = feature.qualifiers.get('note', ['No additional information'])[0]
print(f"Note: {note}")
elif feature.type == "rep_origin":
print("This is a replication origin.")
# Pobierz sekwencję funkcji
feature_seq = record.seq[feature.location.start:feature.location.end]
print(f"Sequence ( ): {feature_seq}\n")
# Uwzględnij orientację nici (strand)
if feature.strand == -1:
feature_seq = feature_seq.reverse_complement()
print(f"Sequence (^): {feature_seq}\n")
# Znajdź i wyświetl sekwencję terminatora T7
for feature in record.features:
if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]:
print("Terminator T7 znaleziony:")
print(f"Type: {feature.type}")
print(f"Location: {feature.location}")
print(f"Sequence: {record.seq[feature.location.start:feature.location.end]}")
print()
#%%
# Znajdź i wyświetl sekwencję terminatora T7
for feature in record.features:
if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]:
print("Terminator T7 znaleziony:")
print(f"Type: {feature.type}")
print(f"Location: {feature.location}")
# Pobierz sekwencję terminatora
terminator_seq = record.seq[feature.location.start:feature.location.end]
# Sprawdź orientację i odwróć/uzupełnij jeśli potrzebne
if feature.strand == -1:
terminator_seq = terminator_seq.reverse_complement()
print(f"(+) Sequence: {terminator_seq}")
print()
#%%
#%%
t=record[25:73].seq
#%%
a = str (t)
#%%
class Model:
def __init__ (self, seq=None):
self.seq = seq
def complement_dna(self):
# Tworzenie słownika komplementarnych zasad
complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
# Tworzenie komplementarnej sekwencji
complementary_sequence = ''.join(complement[base] for base in self.seq)
return complementary_sequence
#%%
r = Model(a)
#%%
r.seq
#%%
r.seq[::-1]
#%%
r.complement_dna()
#%%
# Sekwencja DNA
dna_seq = record.seq
# Znajdź miejsca cięcia dla NcoI i Bpu1102I
ncoI_sites = NcoI.search(dna_seq)
bpu1102I_sites = Bpu1102I.search(dna_seq)
# Przyjmij pierwsze miejsca cięcia
ncoI_site = ncoI_sites[0]
bpu1102I_site = bpu1102I_sites[0]
# Wyodrębnij fragmenty 20 zasad od miejsc cięcia
ncoI_fragment = dna_seq[ncoI_site-16:ncoI_site+4] # 16 zasad przed i 4 po cięciu
bpu1102I_fragment = dna_seq[bpu1102I_site:bpu1102I_site+20] # 20 zasad po cięciu
# Lepkie końce
ncoI_sticky_end = ncoI_fragment[-4:]
ncoI_sticky_end_comp = ncoI_sticky_end.complement()
bpu1102I_sticky_end = bpu1102I_fragment[:4]
bpu1102I_sticky_end_comp = bpu1102I_sticky_end.complement()
# Funkcja do wyświetlania dwuniciowego DNA z lepkimi końcami w formacie schodkowym z indeksami dla dłuższej nici
def print_sticky_ends(seq1, sticky_end1, seq2, sticky_end2, start1, start2):
# Convert sequences to strings
seq1_str = str(seq1)
sticky_end1_str = str(sticky_end1)
comp_seq1_str = str(seq1.complement())
sticky_end1_comp_str = str(sticky_end1.complement())
seq2_str = str(seq2)
sticky_end2_str = str(sticky_end2)
comp_seq2_str = str(seq2.complement())
sticky_end2_comp_str = str(sticky_end2.complement())
# Add indexes to the ends
seq1_with_index = f"{seq1_str[:-1]} -{start1+len(seq1_str)-1}"
comp_seq1_with_index = f"{comp_seq1_str[:-1]}"
seq2_with_index = f"{start2}- {seq2_str[1:]}"
comp_seq2_with_index = f"{comp_seq2_str[1:]}"
# Lustrzane odbicie sekwencji
comp_seq2_with_index = comp_seq2_with_index[::-1]
sticky_end2_comp_str = sticky_end2_comp_str[::-1]
# Format the output as requested
print(f"5'-{seq1_with_index} {sticky_end1_str}-3' (+)")
print(f"3'-{comp_seq1_with_index}{sticky_end1_comp_str}-5'")
print()
print(f"5'-{sticky_end2_str} {seq2_with_index}-3' (+)")
print(f" {sticky_end2_comp_str}{comp_seq2_with_index}-5'")
# Wyświetl lepkie końce fragmentów
print("Fragmenty po cięciu NcoI i Bpu1102I:")
print_sticky_ends(ncoI_fragment[:-4], ncoI_sticky_end, bpu1102I_fragment[4:], bpu1102I_sticky_end, ncoI_site-16, bpu1102I_site)
#%%
# Projektowanie starterów
forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP
reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6
print(f"Forward Primer: {forward_primer}")
print(f"Reverse Primer: {reverse_primer}")
#%%
# Miejsca cięcia restryktazy
enzymes = RestrictionBatch([EcoRI, BamHI, HindIII])
#enzymes = RestrictionBatch([BamHI, ])
restriction_sites = enzymes.search(record.seq)
#%%
# Przeprowadzenie analizy restrykcyjnej
analysis = Analysis(AllEnzymes, record.seq)
# Wynik analizy
results = analysis.full()
# Wyświetlanie wyników
#for enzyme in results:
# if len(results[enzyme]) > 0:
# print(f"{enzyme}: {results[enzyme]}")
#%%
# Utwórz diagram plazmidu
diagram = GenomeDiagram.Diagram("PUC19 Plasmid Map")
track = diagram.new_track(1, name="Annotated Features", greytrack=True)
feature_set = track.new_set()
# Dodanie sekwencji kodującej GFP
feature_set.add_feature(
SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1),
name="GFP Coding Sequence",
label=True,
color=colors.lightblue
)
# Dodanie His6 tagu
feature_set.add_feature(
SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1),
name="His6 Tag",
label=True,
color=colors.lightgreen
)
# Dodanie forward primer
feature_set.add_feature(
SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1),
name="Forward Primer",
label=True,
color=colors.orange
)
# Dodanie reverse primer
feature_set.add_feature(
SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1),
name="Reverse Primer",
label=True,
color=colors.red
)
# Dodanie miejsc cięcia restryktazy
for enzyme, sites in restriction_sites.items():
for site in sites:
feature_set.add_feature(
SeqFeature(FeatureLocation(site, site + 1), strand=0),
name=enzyme,
label=True,
color=colors.purple
)
# Rysowanie diagramu
diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5)
diagram.write("pdf/plasmid_map.pdf", "PDF")
print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")