sequences and api are ok

This commit is contained in:
mpabi 2024-06-20 20:19:38 +00:00
parent e790db7bb7
commit 68367f04c3
2 changed files with 24 additions and 15 deletions

View File

@ -1,7 +1,6 @@
# app/models.py
from sqlalchemy import Column, Integer, String, Text, ForeignKey
from sqlalchemy.orm import relationship
from .database import Base
from app.database import Base
class Sequence(Base):
__tablename__ = "sequences"

View File

@ -1,5 +1,3 @@
#%%
# entrypoint.py
from Bio import SeqIO
from sqlalchemy.orm import Session
from app.database import engine, SessionLocal, Base
@ -9,7 +7,6 @@ import json
# Tworzenie tabeli w bazie danych
Base.metadata.create_all(bind=engine)
#%%
def get_db():
db = SessionLocal()
try:
@ -25,25 +22,38 @@ def load_genbank_to_db(file_path: str):
# Zapisz sekwencję do bazy danych
sequence_data = Sequence(
name=record.name,
description=record.description,
description=record.description if 'description' in record.annotations else 'No description',
sequence=str(record.seq)
)
db.add(sequence_data)
db.commit()
db.refresh(sequence_data) # Odśwież sekwencję, aby uzyskać jej ID
# Zapisz cechy (features) do bazy danych
for feature in record.features:
feature_data = Feature(
type=feature.type,
location=str(feature.location),
sequence=str(record.seq[feature.location.start:feature.location.end]),
qualifiers=json.dumps(feature.qualifiers),
sequence_id=sequence_data.id
)
db.add(feature_data)
if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]:
qualifiers = {}
if feature.type == "CDS":
qualifiers['gene_name'] = feature.qualifiers.get('gene', ['Unknown gene'])[0]
qualifiers['product'] = feature.qualifiers.get('product', ['Unknown product'])[0]
elif feature.type == "protein_bind":
qualifiers['binding_site'] = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0]
elif feature.type == "misc_feature":
qualifiers['note'] = feature.qualifiers.get('note', ['No additional information'])[0]
elif feature.type == "rep_origin":
qualifiers['note'] = "This is a replication origin."
feature_data = Feature(
type=feature.type,
location=str(feature.location),
sequence=str(record.seq[feature.location.start:feature.location.end]),
qualifiers=json.dumps(qualifiers),
sequence_id=sequence_data.id # Ustawienie poprawnego ID
)
db.add(feature_data)
db.commit()
print(f"Loaded {file_path} to database.")
if __name__ == "__main__":
load_genbank_to_db("data/pET-28+(a).gb")
load_genbank_to_db("data/plasmid.gb")