Add image metadata to the file table, and fetch more then one db row per query, because the get_job() function query was somehow completely slamming my database.
This commit is contained in:
parent
bd2abec2fc
commit
fd41dbbd4c
|
@ -0,0 +1,60 @@
|
|||
"""empty message
|
||||
|
||||
Revision ID: bada78e9a9a8
|
||||
Revises: e19fd729888d
|
||||
Create Date: 2017-11-25 03:33:37.355463
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'bada78e9a9a8'
|
||||
down_revision = 'e19fd729888d'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# Patch in knowledge of the citext type, so it reflects properly.
|
||||
from sqlalchemy.dialects.postgresql.base import ischema_names
|
||||
import citext
|
||||
import queue
|
||||
import datetime
|
||||
from sqlalchemy.dialects.postgresql import ENUM
|
||||
from sqlalchemy.dialects.postgresql import JSON
|
||||
from sqlalchemy.dialects.postgresql import TSVECTOR
|
||||
ischema_names['citext'] = citext.CIText
|
||||
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
print("Altering column")
|
||||
op.alter_column('db_files', 'id',
|
||||
existing_type=sa.INTEGER(),
|
||||
type_=sa.BigInteger(),
|
||||
autoincrement=True,
|
||||
existing_server_default=sa.text("nextval('db_files_id_seq'::regclass)"))
|
||||
print("Adding new columns")
|
||||
op.add_column('db_files', sa.Column('imgx', sa.Integer(), nullable=True))
|
||||
op.add_column('db_files', sa.Column('imgy', sa.Integer(), nullable=True))
|
||||
op.add_column('db_files', sa.Column('phash', sa.BigInteger(), nullable=True))
|
||||
print("Creating phash index")
|
||||
op.create_index('phash_bktree_idx', 'db_files', [sa.text('phash bktree_ops')], unique=False, postgresql_using='spgist')
|
||||
print("Done")
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index('phash_bktree_idx', table_name='db_files')
|
||||
op.alter_column('db_files', 'id',
|
||||
existing_type=sa.BigInteger(),
|
||||
type_=sa.INTEGER(),
|
||||
autoincrement=True,
|
||||
existing_server_default=sa.text("nextval('db_files_id_seq'::regclass)"))
|
||||
op.drop_column('db_files', 'phash')
|
||||
op.drop_column('db_files', 'imgy')
|
||||
op.drop_column('db_files', 'imgx')
|
||||
# ### end Alembic commands ###
|
|
@ -5,11 +5,13 @@ from sqlalchemy import create_engine
|
|||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.orm import scoped_session
|
||||
from sqlalchemy import Table
|
||||
from sqlalchemy import Index
|
||||
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy import Integer
|
||||
from sqlalchemy import BigInteger
|
||||
from sqlalchemy import Text
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy import Float
|
||||
from sqlalchemy import Boolean
|
||||
from sqlalchemy import DateTime
|
||||
|
@ -114,14 +116,19 @@ class Artist(Base):
|
|||
|
||||
class Files(Base):
|
||||
__tablename__ = 'db_files'
|
||||
id = Column(Integer, primary_key=True)
|
||||
id = Column(BigInteger, primary_key=True)
|
||||
|
||||
filepath = Column(citext.CIText(), nullable=False)
|
||||
fhash = Column(Text, nullable=False)
|
||||
|
||||
phash = Column(BigInteger)
|
||||
imgx = Column(Integer)
|
||||
imgy = Column(Integer)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint('filepath'),
|
||||
UniqueConstraint('fhash'),
|
||||
Index('phash_bktree_idx', 'phash', postgresql_using="spgist")
|
||||
)
|
||||
|
||||
def tag_creator(tag):
|
||||
|
@ -204,6 +211,7 @@ class Releases(Base):
|
|||
|
||||
__table_args__ = (
|
||||
UniqueConstraint('postid', 'source'),
|
||||
Index('db_releases_source_state_id_idx', 'source', 'state', 'id')
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ class AbstractFetcher(object, metaclass=abc.ABCMeta):
|
|||
self.log = logging.getLogger(self.loggerpath)
|
||||
self.wg = util.WebRequest.WebGetRobust(logPath=self.loggerpath+".Web")
|
||||
|
||||
self.jobs_queued = []
|
||||
|
||||
|
||||
def get_job(self):
|
||||
|
@ -52,35 +53,39 @@ class AbstractFetcher(object, metaclass=abc.ABCMeta):
|
|||
while 1:
|
||||
self.log.info("Getting job")
|
||||
try:
|
||||
if not self.jobs_queued:
|
||||
raw_query = '''
|
||||
UPDATE
|
||||
db_releases
|
||||
SET
|
||||
state = 'fetching'
|
||||
WHERE
|
||||
db_releases.id in (
|
||||
SELECT
|
||||
db_releases.id
|
||||
FROM
|
||||
db_releases
|
||||
WHERE
|
||||
db_releases.state = 'new'
|
||||
AND
|
||||
source = :source
|
||||
ORDER BY
|
||||
db_releases.postid ASC
|
||||
LIMIT 500
|
||||
)
|
||||
AND
|
||||
db_releases.state = 'new'
|
||||
RETURNING
|
||||
db_releases.id;
|
||||
'''
|
||||
|
||||
raw_query = '''
|
||||
UPDATE
|
||||
db_releases
|
||||
SET
|
||||
state = 'fetching'
|
||||
WHERE
|
||||
db_releases.id in (
|
||||
SELECT
|
||||
db_releases.id
|
||||
FROM
|
||||
db_releases
|
||||
WHERE
|
||||
db_releases.state = 'new'
|
||||
AND
|
||||
source = :source
|
||||
ORDER BY
|
||||
db_releases.postid ASC
|
||||
LIMIT 1
|
||||
)
|
||||
AND
|
||||
db_releases.state = 'new'
|
||||
RETURNING
|
||||
db_releases.id;
|
||||
'''
|
||||
rids = session.execute(text(raw_query), {'source' : self.pluginkey})
|
||||
ridl = list(rids)
|
||||
self.jobs_queued = [tmp[0] for tmp in ridl]
|
||||
|
||||
rids = session.execute(text(raw_query), {'source' : self.pluginkey})
|
||||
ridl = list(rids)
|
||||
rid = ridl[0][0]
|
||||
assert self.jobs_queued
|
||||
|
||||
rid = self.jobs_queued.pop()
|
||||
|
||||
|
||||
job = db.session.query(db.Releases) \
|
||||
|
|
Loading…
Reference in New Issue