starter
This commit is contained in:
commit
f5b3d446e1
|
@ -0,0 +1,134 @@
|
|||
#! /usr/bin/env python3
|
||||
# vim:fenc=utf-8
|
||||
#
|
||||
# Copyright © 2024 user <user@penguin>
|
||||
#
|
||||
# Distributed under terms of the MIT license.
|
||||
import os
|
||||
from Bio import Entrez, SeqIO
|
||||
from Bio.Seq import Seq
|
||||
from Bio.SeqFeature import SeqFeature, FeatureLocation
|
||||
from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII
|
||||
from Bio.Graphics import GenomeDiagram
|
||||
from reportlab.lib import colors
|
||||
|
||||
|
||||
#%%
|
||||
# Ustawienia Entrez
|
||||
Entrez.email = "your_email@example.com" # Wpisz swój adres email
|
||||
|
||||
# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie
|
||||
def fetch_and_save_sequence(db, id, file_path):
|
||||
handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text")
|
||||
record = SeqIO.read(handle, "genbank")
|
||||
handle.close()
|
||||
SeqIO.write(record, file_path, "genbank")
|
||||
return record
|
||||
|
||||
# Funkcja do wczytywania sekwencji z lokalnego pliku
|
||||
def load_local_sequence(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
record = SeqIO.read(file, "genbank")
|
||||
return record
|
||||
|
||||
# Ścieżki do lokalnych plików
|
||||
gfp_file_path = 'gfp_sequence.gb'
|
||||
plasmid_file_path = 'plasmid_sequence.gb'
|
||||
|
||||
# ID dla sekwencji w NCBI
|
||||
gfp_id = "U87974"
|
||||
plasmid_id = "U96626"
|
||||
|
||||
# Sprawdzanie i pobieranie sekwencji GFP
|
||||
if not os.path.exists(gfp_file_path):
|
||||
print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...")
|
||||
gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path)
|
||||
print(f"Pobrano i zapisano sekwencję GFP do pliku {gfp_file_path}.")
|
||||
else:
|
||||
print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
|
||||
gfp_record = load_local_sequence(gfp_file_path)
|
||||
print(f"Wczytano dane z pliku {gfp_file_path}.")
|
||||
|
||||
gfp_seq = str(gfp_record.seq)
|
||||
print(f"GFP Sequence: {gfp_seq[:60]}...")
|
||||
|
||||
# Sprawdzanie i pobieranie sekwencji plazmidu
|
||||
if not os.path.exists(plasmid_file_path):
|
||||
print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...")
|
||||
record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path)
|
||||
print(f"Pobrano i zapisano sekwencję plazmidu do pliku {plasmid_file_path}.")
|
||||
else:
|
||||
print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
|
||||
record = load_local_sequence(plasmid_file_path)
|
||||
print(f"Wczytano dane z pliku {plasmid_file_path}.")
|
||||
#%%
|
||||
# Dodanie sekwencji kodującej His6
|
||||
his6_tag = "CACCACCACCACCACCAC" # Sekwencja kodująca 6x histydynę (His6)
|
||||
|
||||
# Sekwencja białka GFP z His6 tagiem na końcu
|
||||
gfp_seq_with_his6 = gfp_seq + his6_tag
|
||||
|
||||
# Projektowanie starterów
|
||||
forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP
|
||||
reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6
|
||||
|
||||
print(f"Forward Primer: {forward_primer}")
|
||||
print(f"Reverse Primer: {reverse_primer}")
|
||||
|
||||
# Miejsca cięcia restryktazy
|
||||
enzymes = RestrictionBatch([EcoRI, BamHI, HindIII])
|
||||
restriction_sites = enzymes.search(record.seq)
|
||||
|
||||
# Utwórz diagram plazmidu
|
||||
diagram = GenomeDiagram.Diagram("pET-28a(+) Plasmid Map")
|
||||
track = diagram.new_track(1, name="Annotated Features", greytrack=True)
|
||||
feature_set = track.new_set()
|
||||
|
||||
# Dodanie sekwencji kodującej GFP
|
||||
feature_set.add_feature(
|
||||
SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1),
|
||||
name="GFP Coding Sequence",
|
||||
label=True,
|
||||
color=colors.lightblue
|
||||
)
|
||||
|
||||
# Dodanie His6 tagu
|
||||
feature_set.add_feature(
|
||||
SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1),
|
||||
name="His6 Tag",
|
||||
label=True,
|
||||
color=colors.lightgreen
|
||||
)
|
||||
|
||||
# Dodanie forward primer
|
||||
feature_set.add_feature(
|
||||
SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1),
|
||||
name="Forward Primer",
|
||||
label=True,
|
||||
color=colors.orange
|
||||
)
|
||||
|
||||
# Dodanie reverse primer
|
||||
feature_set.add_feature(
|
||||
SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1),
|
||||
name="Reverse Primer",
|
||||
label=True,
|
||||
color=colors.red
|
||||
)
|
||||
|
||||
# Dodanie miejsc cięcia restryktazy
|
||||
for enzyme, sites in restriction_sites.items():
|
||||
for site in sites:
|
||||
feature_set.add_feature(
|
||||
SeqFeature(FeatureLocation(site, site + 1), strand=0),
|
||||
name=enzyme,
|
||||
label=True,
|
||||
color=colors.purple
|
||||
)
|
||||
|
||||
# Rysowanie diagramu
|
||||
diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5)
|
||||
diagram.write("pdf/plasmid_map.pdf", "PDF")
|
||||
|
||||
print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
LOCUS SCU87974 818 bp mRNA linear SYN 24-FEB-1997
|
||||
DEFINITION Synthetic construct modified green fluorescent protein GFP5-ER
|
||||
(mgfp5-ER) mRNA, complete cds.
|
||||
ACCESSION U87974
|
||||
VERSION U87974.1
|
||||
KEYWORDS .
|
||||
SOURCE synthetic construct
|
||||
ORGANISM synthetic construct
|
||||
other sequences; artificial sequences.
|
||||
REFERENCE 1 (bases 1 to 818)
|
||||
AUTHORS Siemering,K.R., Golbik,R., Sever,R. and Haseloff,J.
|
||||
TITLE Mutations that suppress the thermosensitivity of green fluorescent
|
||||
protein
|
||||
JOURNAL Curr. Biol. 6 (12), 1653-1663 (1996)
|
||||
PUBMED 8994830
|
||||
REFERENCE 2 (bases 1 to 818)
|
||||
AUTHORS Haseloff,J., Siemering,K.R., Prasher,D. and Hodge,S.
|
||||
TITLE Removal of a cryptic intron and subcellular localisation of green
|
||||
fluorescent protein are required to mark transgenic Arabidopsis
|
||||
plants brightly
|
||||
JOURNAL Proc. Natl. Acad. Sci. U.S.A. (1997) In press
|
||||
REFERENCE 3 (bases 1 to 818)
|
||||
AUTHORS Siemering,K.R., Golbik,R., Sever,R. and Haseloff,J.
|
||||
TITLE Direct Submission
|
||||
JOURNAL Submitted (31-JAN-1997) Division of Cell Biology, MRC Laboratory of
|
||||
Molecular Biology, Hills Road, Cambridge CB2 2QH, UK
|
||||
FEATURES Location/Qualifiers
|
||||
source 1..818
|
||||
/organism="synthetic construct"
|
||||
/mol_type="mRNA"
|
||||
/db_xref="taxon:32630"
|
||||
gene 1..818
|
||||
/gene="mgfp5-ER"
|
||||
CDS 21..812
|
||||
/gene="mgfp5-ER"
|
||||
/note="contains codon usage changes that disrupt a cryptic
|
||||
plant intron, mutations that increase the thermotolerance
|
||||
and change the spectral characteristics of the protein, and
|
||||
sequences that code for signal peptides that result in
|
||||
retention of the protein in the plant endoplasmic
|
||||
reticulum"
|
||||
/codon_start=1
|
||||
/transl_table=11
|
||||
/product="modified green fluorescent protein GFP5-ER"
|
||||
/protein_id="AAB47999.1"
|
||||
/translation="MKTNLFLFLIFSLLLSLSSAEFSKGEELFTGVVPILVELDGDVNG
|
||||
HKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDF
|
||||
FKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLE
|
||||
YNYNSHNVYIMADKQKNGIKANFKTRHNIEDGGVQLADHYQQNTPIGDGPVLLPDNHYL
|
||||
STQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYKHDEL"
|
||||
ORIGIN
|
||||
1 ggatccaagg agatataaca atgaagacta atctttttct ctttctcatc ttttcacttc
|
||||
61 tcctatcatt atcctcggcc gaattcagta aaggagaaga acttttcact ggagttgtcc
|
||||
121 caattcttgt tgaattagat ggtgatgtta atgggcacaa attttctgtc agtggagagg
|
||||
181 gtgaaggtga tgcaacatac ggaaaactta cccttaaatt tatttgcact actggaaaac
|
||||
241 tacctgttcc atggccaaca cttgtcacta ctttctctta tggtgttcaa tgcttttcaa
|
||||
301 gatacccaga tcatatgaag cggcacgact tcttcaagag cgccatgcct gagggatacg
|
||||
361 tgcaggagag gaccatcttc ttcaaggacg acgggaacta caagacacgt gctgaagtca
|
||||
421 agtttgaggg agacaccctc gtcaacagga tcgagcttaa gggaatcgat ttcaaggagg
|
||||
481 acggaaacat cctcggccac aagttggaat acaactacaa ctcccacaac gtatacatca
|
||||
541 tggccgacaa gcaaaagaac ggcatcaaag ccaacttcaa gacccgccac aacatcgaag
|
||||
601 acggcggcgt gcaactcgct gatcattatc aacaaaatac tccaattggc gatggccctg
|
||||
661 tccttttacc agacaaccat tacctgtcca cacaatctgc cctttcgaaa gatcccaacg
|
||||
721 aaaagagaga ccacatggtc cttcttgagt ttgtaacagc tgctgggatt acacatggca
|
||||
781 tggatgaact atacaaacat gatgagcttt aagagctc
|
||||
//
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,131 @@
|
|||
LOCUS MMU96626 4742 bp DNA linear ROD 07-FEB-1998
|
||||
DEFINITION Mus musculus chondroadherin gene, complete cds.
|
||||
ACCESSION U96626
|
||||
VERSION U96626.1
|
||||
KEYWORDS .
|
||||
SOURCE Mus musculus (house mouse)
|
||||
ORGANISM Mus musculus
|
||||
Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
|
||||
Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha;
|
||||
Muroidea; Muridae; Murinae; Mus; Mus.
|
||||
REFERENCE 1 (bases 1 to 4742)
|
||||
AUTHORS Landgren,C., Beier,D.R., Fassler,R., Heinegard,D. and Sommarin,Y.
|
||||
TITLE The mouse chondroadherin gene: characterization and chromosomal
|
||||
localization
|
||||
JOURNAL Genomics 47 (1), 84-91 (1998)
|
||||
PUBMED 9465299
|
||||
REFERENCE 2 (bases 1 to 4742)
|
||||
AUTHORS Landgren,C. and Sommarin,Y.
|
||||
TITLE Direct Submission
|
||||
JOURNAL Submitted (07-APR-1997) Connective Tissue, Cell and Molecular
|
||||
Biology, Lund 22100, Sweden
|
||||
FEATURES Location/Qualifiers
|
||||
source 1..4742
|
||||
/organism="Mus musculus"
|
||||
/mol_type="genomic DNA"
|
||||
/db_xref="taxon:10090"
|
||||
/chromosome="11"
|
||||
/map="between D11Mit36 and D11Mit10"
|
||||
mRNA join(671..1490,3420..3583,3831..3976,4202..4742)
|
||||
/product="chondroadherin"
|
||||
exon 671..1490
|
||||
/number=1
|
||||
CDS join(720..1490,3420..3583,3831..3972)
|
||||
/codon_start=1
|
||||
/product="chondroadherin"
|
||||
/protein_id="AAC39963.1"
|
||||
/translation="MARALLFSLVFLAILLPALAACPQNCHCHGDLQHVICDKVGLQKI
|
||||
PKVSETTKLLNLQRNNFPVLAANSFRTMPNLVSLHLQHCNIREVAAGAFRGLKQLIYLY
|
||||
LSHNDIRVLRAGAFDDLTELTYLYLDHNKVSELPRGLLSPLVNLFILQLNNNKIRELRA
|
||||
GAFQGAKDLRWLYLSENALSSLQPGSLDDVENLAKFHLDKNQLSSYPSAALSKLRVVEE
|
||||
LKLSHNPLKSIPDNAFQSFGRYLETLWLDNTNLEKFSDAAFSGVTTLKHVHLDNNRLNQ
|
||||
LPSSFPFDNLETLTLTNNPWKCTCQLRGLRRWLEAKASRPDATCSSPAKFKGQRIRDTD
|
||||
ALRSCKSPTKRSKKAGRH"
|
||||
exon 3420..3583
|
||||
/number=2
|
||||
exon 3831..3976
|
||||
/number=3
|
||||
exon 4202..4742
|
||||
/number=4
|
||||
ORIGIN
|
||||
1 gagctcttac gggcctggtg ccactgggct ccgagaaggg gcagagccaa acgcacggct
|
||||
61 gtcagctagc ctctgcaacc agctccccca cctccttggg ataaactgag gaacccagaa
|
||||
121 gcgggagccc aacccacagc agctctcacg ctccgcctgc gccgcacaac agtcccatta
|
||||
181 aagcgccgcc ggctggccga ccgcggtgag acgcatccgg ctgtcgggcc ccacttcctc
|
||||
241 cctcccggag tccagggtga cctgtctgcc aagggtgtat gggggaagga gacgtagaga
|
||||
301 actcaaactt gagcaaataa ataagttctg ggaacacttc cctctgccca gtggaaattc
|
||||
361 agaagcccct cgacacacct atcaccgtcc accccacctc ggggtgttgg tccagataga
|
||||
421 ggagggtagg ggaaggtgca gcataatgtt tgcaaacagg aaccaagggg ttggggttca
|
||||
481 ggggaagggc cctcagccct acacacggtc tcctgctgtg aaaagaggcc cccagccatc
|
||||
541 gaggatgggg acgcatctct gggcgggaag gggttaaatc agtggcttcg gtgctccacg
|
||||
601 tagtagctgg ctccgctgcc aactgcggtc aaggctgccc tataaatggg ccgggagacc
|
||||
661 cgagagtcga ggacttgtcg ctgccttagc ccccagccca ggctcaaggc gttctaacca
|
||||
721 tggcccgcgc gctcttattc agtctggtct ttcttgccat cctcctgcct gcgctagccg
|
||||
781 cctgccccca aaactgccac tgccatggag atctgcagca tgtcatctgc gacaaggtgg
|
||||
841 ggctgcagaa gatccccaag gtatcagaga caaccaaact gctcaatctc cagcgcaaca
|
||||
901 acttcccggt gctggctgcc aactcgtttc ggaccatgcc gaacctggtc tccctgcacc
|
||||
961 tgcaacactg caacatccgc gaggtggcgg ctggtgcctt ccgaggcctg aagcagctta
|
||||
1021 tctacctgta cctgtcccac aacgacatcc gggtattgcg agctggagcc ttcgacgacc
|
||||
1081 tgactgaact cacttacctc tatctagacc acaacaaagt gtcggaactg ccccgggggt
|
||||
1141 tgctctctcc tctggtcaac ctcttcatct tgcaactcaa caacaacaaa atccgagagc
|
||||
1201 tgcgtgctgg agctttccag ggggccaagg acctgcgctg gctctacctg tcagaaaatg
|
||||
1261 ccctcagttc cctgcagcct ggttccctgg atgatgtgga gaacctagcc aagttccacc
|
||||
1321 tggacaagaa ccagctgtct agctacccct cagccgccct gagcaaactt cgggtggtgg
|
||||
1381 aggagctgaa gctgtctcac aaccctctga agagcatccc agacaatgcc ttccagtcct
|
||||
1441 tcggtagata tctggagacc ctctggctgg ataacaccaa cctggagaag gtaagtgccc
|
||||
1501 cagctgcagt tctcccggct ctttctagga tgccacccca ggaaccacat cgggacagca
|
||||
1561 ttccctcctc ctggactcct tgtcactcag attatcctcc aaagcaaggg gcctttgcca
|
||||
1621 cttctctctg cattgacaca cacccctccc catccttctc acccttacct ggtcccgagg
|
||||
1681 cttctcagac agggctctct tgggactcct tgtaagagat ggaactgttc tggtcccaca
|
||||
1741 aatggcctct ggggaggcat ctgccccaga ccccacggtt aacagcagac acctcttcct
|
||||
1801 gttggacttt ccacccctct cttaccagtc tcctgcaagc ctaccatttc ataaaggaag
|
||||
1861 gtttgtctac tgaggcacct tccagaagct gagagaacac aaagagtgcc tgttcccagt
|
||||
1921 gggtgctggc ctgtgttgct aaggtaaagt gagaggccag aggatgaatt ggggagcagg
|
||||
1981 taccttaagg agaactctcc cacaccctcg tcccattccc tccgtgggtc gaaacacact
|
||||
2041 caattttatt gcaacctcgg gtcgatttca aatatggacg ggaaaccaag acacggagac
|
||||
2101 ctgacttggc ttcaccgagt tgctggcttg gtgctcttgc agcccgtgaa taaacacatg
|
||||
2161 caatccctgg cacacgcccc cccaagtgtc tcctcagagc acagactgac tgtctcactg
|
||||
2221 tccagcagac agtttgttga cttttgcttt ccccagtcac cagatcccag actcctcaat
|
||||
2281 tcttcctccc ttgggatgct caggctagcc tggtcggggc tgtctaggag caaagtcctt
|
||||
2341 ttcagggaat caccctggta cacaacacag tttcagccag aaggttcggg cagagaccag
|
||||
2401 agcgaggtag agtagttttg tttgtagaga acgtcacagc ccaatctggt ctgtgagagg
|
||||
2461 tcacagatac tgagctgagg acagaacgcc aaccttatct aggtagatgg agccttctgg
|
||||
2521 gtggatcttc tttagcctgg cctcacccca gctaaatgta gaccacccta gtgacaaaac
|
||||
2581 agtggccagt agaacagcat acgaaggctg atttagaatg aggaggccaa agccgggcgt
|
||||
2641 ggtagcgcac gccttcaatc ccagtacctg ggaggcaaag gcaggcgaat ttctaagttc
|
||||
2701 gaggccagcc tggtctacag agtgagttcc aggacagcca ggactacaca gagaaaccct
|
||||
2761 gtctcacaaa aaccaaaaag aaaaaagaaa aaaaaaagaa tgaggaggcc agagagaact
|
||||
2821 ggcaccaagg ctcagaggag ggcaccaata catttcctct ggagccatcc atcctggcca
|
||||
2881 aagtgcccac agagagctcc acaaatagca tctagcagaa ggcagagcaa tgaggacacc
|
||||
2941 actgggggaa atcctgggac cagggggctg tggggagcct gggctcactg cccatcccac
|
||||
3001 caaaccctga ccaggttcac cgtgtgcccg ccccaggcac actgcccatc ccaccaaaca
|
||||
3061 ctgaccaggt tcaccgtgtg cccgcccggg cactcaaagc caagctctgc ctgctcccca
|
||||
3121 gacccagtgt ttgtgcaggg aggtgctgcc taggaggtgg gggacagggt gtggtcctgc
|
||||
3181 agctactgtt tactagctag gtgaccctgt ccaaaggact gaaccttttc cgtttacaca
|
||||
3241 ttgggaagga ccccttccct ctctctctct ctctcaaaaa aaggtgttgt ggaggatgac
|
||||
3301 atcattgtgg ctcctcaaca ggccagggcg tgtgtccagg gaaaagacaa ccctggtcct
|
||||
3361 ctgagaaggg agccaggaga ctgagttgag gtgctcactc tgcctccctc acctcacagt
|
||||
3421 tctcagatgc tgccttctcg ggtgtgacca cactgaaaca cgtccatctg gacaacaacc
|
||||
3481 gcctgaacca actgccttcc tccttcccct ttgacaacct ggagaccctc actctcacca
|
||||
3541 acaacccatg gaaatgcacc tgccagctcc gtggccttcg gcggtgagaa tattcctcca
|
||||
3601 tataaccccc agactgccgt ccacatgaca gacggtccta gagtaggaca gcctggacat
|
||||
3661 cctagtcagc tacctagcat gtcgggtact gagtggttcc cttctctcat ttgtcaaatg
|
||||
3721 aagatgacaa ctccagatat ttctatggcc atagtccatc ccggtcactg tccctttccc
|
||||
3781 aagccttccc acccagcttt tccaagccca gcaactcttt gtctctgtag gtggttggaa
|
||||
3841 gccaaggctt ctcgaccgga tgctacctgc tcctcgccag ccaagttcaa gggtcagcgg
|
||||
3901 attcgtgaca cagatgccct tcgcagctgc aaatccccga ccaagaggtc caagaaagct
|
||||
3961 ggccgccatt aaacaggtgg gggctgggta gggaggccac cacggtctac ctttggaaat
|
||||
4021 tccagatggg gtgctgctat atcccatgac accacttccg gaggagcaat cagttccctg
|
||||
4081 tcttacaaga aaaggaggga ggacaggata acctctccca tggcttggcc taggacgtcc
|
||||
4141 atgggtccct ttaatgactc tgggtgactg gaatcctaat acccatcttc tctcactata
|
||||
4201 ggtcctgatc cagccagtcc tggcgactgc cttccgctgg agagactact gacgttccct
|
||||
4261 cccatcatcc acaccttctc ctacagcctc tgcggatgca cagcgctgcc ccgcccccgc
|
||||
4321 ccccacctag gtacatcctg gcaggggcac tgggctctct atcaccatcc cagctccacc
|
||||
4381 cagtggggtc ctaggaaaga cacagaatcc ctccccagcc actgtgtctg ggctctgcca
|
||||
4441 tggctccttt gagagaagct attgtagaac ctcctaccct ctgtccatcg gagctaaagc
|
||||
4501 gcagtggtca ttgggatgac cacgttatta ccaccttcct cggttccctc tgtccctgcc
|
||||
4561 atttggaaac aaacatcagg cccctgaccc accctgattg ccagaaagaa tttcaggccc
|
||||
4621 atgccccaac tctgccagtt cctgcctgcc aggacatgct accaggatac cagtagcgct
|
||||
4681 tggctgcata tccttcctgt ttgcgctcca gatttctata aacataaatg tatgtgtgtt
|
||||
4741 ca
|
||||
//
|
Loading…
Reference in New Issue