2024-06-20 19:10:59 +00:00
|
|
|
from Bio import SeqIO
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.database import engine, SessionLocal, Base
|
|
|
|
from app.models import Sequence, Feature
|
|
|
|
import json
|
|
|
|
|
|
|
|
# Tworzenie tabeli w bazie danych
|
|
|
|
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
|
|
def get_db():
|
|
|
|
db = SessionLocal()
|
|
|
|
try:
|
|
|
|
yield db
|
|
|
|
finally:
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
def load_genbank_to_db(file_path: str):
|
|
|
|
db = next(get_db())
|
|
|
|
with open(file_path, 'r') as file:
|
|
|
|
record = SeqIO.read(file, "genbank")
|
|
|
|
|
|
|
|
# Zapisz sekwencję do bazy danych
|
|
|
|
sequence_data = Sequence(
|
|
|
|
name=record.name,
|
2024-06-20 20:19:38 +00:00
|
|
|
description=record.description if 'description' in record.annotations else 'No description',
|
2024-06-20 19:10:59 +00:00
|
|
|
sequence=str(record.seq)
|
|
|
|
)
|
|
|
|
db.add(sequence_data)
|
|
|
|
db.commit()
|
2024-06-20 20:19:38 +00:00
|
|
|
db.refresh(sequence_data) # Odśwież sekwencję, aby uzyskać jej ID
|
2024-06-20 19:10:59 +00:00
|
|
|
|
|
|
|
# Zapisz cechy (features) do bazy danych
|
|
|
|
for feature in record.features:
|
2024-06-20 20:19:38 +00:00
|
|
|
if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]:
|
|
|
|
qualifiers = {}
|
|
|
|
if feature.type == "CDS":
|
|
|
|
qualifiers['gene_name'] = feature.qualifiers.get('gene', ['Unknown gene'])[0]
|
|
|
|
qualifiers['product'] = feature.qualifiers.get('product', ['Unknown product'])[0]
|
|
|
|
elif feature.type == "protein_bind":
|
|
|
|
qualifiers['binding_site'] = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0]
|
|
|
|
elif feature.type == "misc_feature":
|
|
|
|
qualifiers['note'] = feature.qualifiers.get('note', ['No additional information'])[0]
|
|
|
|
elif feature.type == "rep_origin":
|
|
|
|
qualifiers['note'] = "This is a replication origin."
|
|
|
|
|
|
|
|
feature_data = Feature(
|
|
|
|
type=feature.type,
|
|
|
|
location=str(feature.location),
|
|
|
|
sequence=str(record.seq[feature.location.start:feature.location.end]),
|
|
|
|
qualifiers=json.dumps(qualifiers),
|
|
|
|
sequence_id=sequence_data.id # Ustawienie poprawnego ID
|
|
|
|
)
|
|
|
|
db.add(feature_data)
|
2024-06-20 19:10:59 +00:00
|
|
|
db.commit()
|
|
|
|
print(f"Loaded {file_path} to database.")
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2024-06-20 20:19:38 +00:00
|
|
|
load_genbank_to_db("data/plasmid.gb")
|
2024-06-20 19:10:59 +00:00
|
|
|
|