inf04-web2/data/gfp2.py

#! /usr/bin/env python3
# vim:fenc=utf-8
#
# Copyright Â© 2024 user <user@penguin>
#
# Distributed under terms of the MIT license.
import os
from Bio import Entrez, SeqIO
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation

from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII, Bpu1102I
from Bio.Restriction import AllEnzymes, Analysis

from Bio.Graphics import GenomeDiagram
from reportlab.lib import colors

#%%
# Ustawienia Entrez
Entrez.email = "your_email@example.com"  # Wpisz swÃ³j adres email

# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie
def fetch_and_save_sequence(db, id, file_path):
    handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text")
    record = SeqIO.read(handle, "genbank")
    handle.close()
    SeqIO.write(record, file_path, "genbank")
    return record

# Funkcja do wczytywania sekwencji z lokalnego pliku
def load_local_sequence(file_path):
    with open(file_path, 'r') as file:
        record = SeqIO.read(file, "genbank")
    return record

#%%

# ID dla sekwencji w NCBI
gfp_id = "U87974"
plasmid_id = "pET-28a(+)"

# ÅcieÅ¼ki do lokalnych plikÃ³w
gfp_file_path = gfp_id+'.gb'
plasmid_file_path = plasmid_id+'.gb'

#%%
# Sprawdzanie i pobieranie sekwencji GFP
if not os.path.exists(gfp_file_path):
    print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...")
    gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path)
    print(f"Pobrano i zapisano sekwencjÄ GFP do pliku {gfp_file_path}.")
else:
    print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
    gfp_record = load_local_sequence(gfp_file_path)
    print(f"Wczytano dane z pliku {gfp_file_path}.")

gfp_seq = str(gfp_record.seq)
print(f"GFP Sequence: {gfp_seq[:60]}...")

#%%
# Sprawdzanie i pobieranie sekwencji plazmidu
if not os.path.exists(plasmid_file_path):
    print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...")
    record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path)
    print(f"Pobrano i zapisano sekwencjÄ plazmidu do pliku {plasmid_file_path}.")
else:
    print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
    record = load_local_sequence(plasmid_file_path)
    print(f"Wczytano dane z pliku {plasmid_file_path}.")

#%%
# WyÅwietl etykiety (annotacje)
for feature in record.features:
    print(f"Type: {feature.type}")
    print(f"Location: {feature.location}")
    if 'gene' in feature.qualifiers:
        print(f"Gene: {feature.qualifiers['gene']}")
    if 'product' in feature.qualifiers:
        print(f"Product: {feature.qualifiers['product']}")
    print()
#%%


#%%

# PrzejdÅº przez wszystkie funkcje w rekordzie i wypisz ich szczegÃ³Åowe informacje
for feature in record.features:
    if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]:
        print(f"Type: {feature.type}")
        print(f"Location: {feature.location}")
        print(f"Strand: {'+' if feature.strand == 1 else '-'}")

        # SzczegÃ³Åowe opisy
        if feature.type == "CDS":
            gene_name = feature.qualifiers.get('gene', ['Unknown gene'])[0]
            product = feature.qualifiers.get('product', ['Unknown product'])[0]
            print(f"Gene Name: {gene_name}")
            print(f"Product: {product}")
        elif feature.type == "protein_bind":
            binding_site = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0]
            print(f"Binding Site: {binding_site}")
        elif feature.type == "misc_feature":
            note = feature.qualifiers.get('note', ['No additional information'])[0]
            print(f"Note: {note}")
        elif feature.type == "rep_origin":
            print("This is a replication origin.")

        # Pobierz sekwencjÄ funkcji
        feature_seq = record.seq[feature.location.start:feature.location.end]


        print(f"Sequence ( ): {feature_seq}\n")
        # UwzglÄdnij orientacjÄ nici (strand)
        if feature.strand == -1:
            feature_seq = feature_seq.reverse_complement()
            print(f"Sequence (^): {feature_seq}\n")


# ZnajdÅº i wyÅwietl sekwencjÄ terminatora T7
for feature in record.features:
    if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]:
        print("Terminator T7 znaleziony:")
        print(f"Type: {feature.type}")
        print(f"Location: {feature.location}")
        print(f"Sequence: {record.seq[feature.location.start:feature.location.end]}")
        print()


#%%
# ZnajdÅº i wyÅwietl sekwencjÄ terminatora T7
for feature in record.features:
    if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]:
        print("Terminator T7 znaleziony:")
        print(f"Type: {feature.type}")
        print(f"Location: {feature.location}")

        # Pobierz sekwencjÄ terminatora
        terminator_seq = record.seq[feature.location.start:feature.location.end]

        # SprawdÅº orientacjÄ i odwrÃ³Ä/uzupeÅnij jeÅli potrzebne
        if feature.strand == -1:
            terminator_seq = terminator_seq.reverse_complement()

        print(f"(+) Sequence: {terminator_seq}")
        print()

#%%


#%%
t=record[25:73].seq


#%%
a = str (t)
a

#%%
class Model:

    complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}

    def __init__ (self, seq=None):
        self.seq = seq

    def complement_dna(self):

        complementary_sequence = ''.join(self.complement[base] for base in self.seq)
        return complementary_sequence

#%%
r = Model(a)

#%%
r.seq

#%%
r.seq[::-1]

#%%
r.complement_dna()

#%%
r.seq
#%%
complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
complementary_sequence = ''.join(complement[base] for base in r.seq)
complementary_sequence

#%%


#%%
# Sekwencja DNA
dna_seq = record.seq

# ZnajdÅº miejsca ciÄcia dla NcoI i Bpu1102I
ncoI_sites = NcoI.search(dna_seq)
bpu1102I_sites = Bpu1102I.search(dna_seq)

# Przyjmij pierwsze miejsca ciÄcia
ncoI_site = ncoI_sites[0]
bpu1102I_site = bpu1102I_sites[0]

# WyodrÄbnij fragmenty 20 zasad od miejsc ciÄcia
ncoI_fragment = dna_seq[ncoI_site-16:ncoI_site+4]  # 16 zasad przed i 4 po ciÄciu
bpu1102I_fragment = dna_seq[bpu1102I_site:bpu1102I_site+20]  # 20 zasad po ciÄciu

# Lepkie koÅce
ncoI_sticky_end = ncoI_fragment[-4:]
ncoI_sticky_end_comp = ncoI_sticky_end.complement()

bpu1102I_sticky_end = bpu1102I_fragment[:4]
bpu1102I_sticky_end_comp = bpu1102I_sticky_end.complement()

# Funkcja do wyÅwietlania dwuniciowego DNA z lepkimi koÅcami w formacie schodkowym z indeksami dla dÅuÅ¼szej nici
def print_sticky_ends(seq1, sticky_end1, seq2, sticky_end2, start1, start2):
    # Convert sequences to strings
    seq1_str = str(seq1)
    sticky_end1_str = str(sticky_end1)
    comp_seq1_str = str(seq1.complement())
    sticky_end1_comp_str = str(sticky_end1.complement())

    seq2_str = str(seq2)
    sticky_end2_str = str(sticky_end2)
    comp_seq2_str = str(seq2.complement())
    sticky_end2_comp_str = str(sticky_end2.complement())

    # Add indexes to the ends
    seq1_with_index = f"{seq1_str[:-1]} -{start1+len(seq1_str)-1}"
    comp_seq1_with_index = f"{comp_seq1_str[:-1]}"

    seq2_with_index = f"{start2}- {seq2_str[1:]}"
    comp_seq2_with_index = f"{comp_seq2_str[1:]}"

    # Lustrzane odbicie sekwencji
    comp_seq2_with_index = comp_seq2_with_index[::-1]
    sticky_end2_comp_str = sticky_end2_comp_str[::-1]

    # Format the output as requested
    print(f"5'-{seq1_with_index}             {sticky_end1_str}-3' (+)")
    print(f"3'-{comp_seq1_with_index}{sticky_end1_comp_str}-5'")
    print()
    print(f"5'-{sticky_end2_str}               {seq2_with_index}-3' (+)")
    print(f"   {sticky_end2_comp_str}{comp_seq2_with_index}-5'")

# WyÅwietl lepkie koÅce fragmentÃ³w
print("Fragmenty po ciÄciu NcoI i Bpu1102I:")
print_sticky_ends(ncoI_fragment[:-4], ncoI_sticky_end, bpu1102I_fragment[4:], bpu1102I_sticky_end, ncoI_site-16, bpu1102I_site)

#%%
# Projektowanie starterÃ³w
forward_primer = gfp_seq[:20]  # Pierwsze 20 nukleotydÃ³w sekwencji GFP
reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement())  # Ostatnie 20 nukleotydÃ³w + His6

print(f"Forward Primer: {forward_primer}")
print(f"Reverse Primer: {reverse_primer}")

#%%
# Miejsca ciÄcia restryktazy
enzymes = RestrictionBatch([EcoRI, BamHI, HindIII])
#enzymes = RestrictionBatch([BamHI, ])
restriction_sites = enzymes.search(record.seq)

#%%
# Przeprowadzenie analizy restrykcyjnej
analysis = Analysis(AllEnzymes, record.seq)

# Wynik analizy
results = analysis.full()

# WyÅwietlanie wynikÃ³w
#for enzyme in results:
#    if len(results[enzyme]) > 0:
#        print(f"{enzyme}: {results[enzyme]}")

#%%
# UtwÃ³rz diagram plazmidu
diagram = GenomeDiagram.Diagram("PUC19 Plasmid Map")
track = diagram.new_track(1, name="Annotated Features", greytrack=True)
feature_set = track.new_set()

# Dodanie sekwencji kodujÄcej GFP
feature_set.add_feature(
    SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1),
    name="GFP Coding Sequence",
    label=True,
    color=colors.lightblue
)

# Dodanie His6 tagu
feature_set.add_feature(
    SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1),
    name="His6 Tag",
    label=True,
    color=colors.lightgreen
)

# Dodanie forward primer
feature_set.add_feature(
    SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1),
    name="Forward Primer",
    label=True,
    color=colors.orange
)

# Dodanie reverse primer
feature_set.add_feature(
    SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1),
    name="Reverse Primer",
    label=True,
    color=colors.red
)

# Dodanie miejsc ciÄcia restryktazy
for enzyme, sites in restriction_sites.items():
    for site in sites:
        feature_set.add_feature(
            SeqFeature(FeatureLocation(site, site + 1), strand=0),
            name=enzyme,
            label=True,
            color=colors.purple
        )

# Rysowanie diagramu
diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5)
diagram.write("pdf/plasmid_map.pdf", "PDF")

print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")