Add image metadata to the file table, and fetch more then one db row per query, because the get_job() function query was somehow completely slamming my database.
This commit is contained in:
parent
bd2abec2fc
commit
fd41dbbd4c
|
@ -0,0 +1,60 @@
|
||||||
|
"""empty message
|
||||||
|
|
||||||
|
Revision ID: bada78e9a9a8
|
||||||
|
Revises: e19fd729888d
|
||||||
|
Create Date: 2017-11-25 03:33:37.355463
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'bada78e9a9a8'
|
||||||
|
down_revision = 'e19fd729888d'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# Patch in knowledge of the citext type, so it reflects properly.
|
||||||
|
from sqlalchemy.dialects.postgresql.base import ischema_names
|
||||||
|
import citext
|
||||||
|
import queue
|
||||||
|
import datetime
|
||||||
|
from sqlalchemy.dialects.postgresql import ENUM
|
||||||
|
from sqlalchemy.dialects.postgresql import JSON
|
||||||
|
from sqlalchemy.dialects.postgresql import TSVECTOR
|
||||||
|
ischema_names['citext'] = citext.CIText
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
print("Altering column")
|
||||||
|
op.alter_column('db_files', 'id',
|
||||||
|
existing_type=sa.INTEGER(),
|
||||||
|
type_=sa.BigInteger(),
|
||||||
|
autoincrement=True,
|
||||||
|
existing_server_default=sa.text("nextval('db_files_id_seq'::regclass)"))
|
||||||
|
print("Adding new columns")
|
||||||
|
op.add_column('db_files', sa.Column('imgx', sa.Integer(), nullable=True))
|
||||||
|
op.add_column('db_files', sa.Column('imgy', sa.Integer(), nullable=True))
|
||||||
|
op.add_column('db_files', sa.Column('phash', sa.BigInteger(), nullable=True))
|
||||||
|
print("Creating phash index")
|
||||||
|
op.create_index('phash_bktree_idx', 'db_files', [sa.text('phash bktree_ops')], unique=False, postgresql_using='spgist')
|
||||||
|
print("Done")
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index('phash_bktree_idx', table_name='db_files')
|
||||||
|
op.alter_column('db_files', 'id',
|
||||||
|
existing_type=sa.BigInteger(),
|
||||||
|
type_=sa.INTEGER(),
|
||||||
|
autoincrement=True,
|
||||||
|
existing_server_default=sa.text("nextval('db_files_id_seq'::regclass)"))
|
||||||
|
op.drop_column('db_files', 'phash')
|
||||||
|
op.drop_column('db_files', 'imgy')
|
||||||
|
op.drop_column('db_files', 'imgx')
|
||||||
|
# ### end Alembic commands ###
|
|
@ -5,11 +5,13 @@ from sqlalchemy import create_engine
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from sqlalchemy.orm import scoped_session
|
from sqlalchemy.orm import scoped_session
|
||||||
from sqlalchemy import Table
|
from sqlalchemy import Table
|
||||||
|
from sqlalchemy import Index
|
||||||
|
|
||||||
from sqlalchemy import Column
|
from sqlalchemy import Column
|
||||||
from sqlalchemy import Integer
|
from sqlalchemy import Integer
|
||||||
from sqlalchemy import BigInteger
|
from sqlalchemy import BigInteger
|
||||||
from sqlalchemy import Text
|
from sqlalchemy import Text
|
||||||
|
from sqlalchemy import text
|
||||||
from sqlalchemy import Float
|
from sqlalchemy import Float
|
||||||
from sqlalchemy import Boolean
|
from sqlalchemy import Boolean
|
||||||
from sqlalchemy import DateTime
|
from sqlalchemy import DateTime
|
||||||
|
@ -114,14 +116,19 @@ class Artist(Base):
|
||||||
|
|
||||||
class Files(Base):
|
class Files(Base):
|
||||||
__tablename__ = 'db_files'
|
__tablename__ = 'db_files'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(BigInteger, primary_key=True)
|
||||||
|
|
||||||
filepath = Column(citext.CIText(), nullable=False)
|
filepath = Column(citext.CIText(), nullable=False)
|
||||||
fhash = Column(Text, nullable=False)
|
fhash = Column(Text, nullable=False)
|
||||||
|
|
||||||
|
phash = Column(BigInteger)
|
||||||
|
imgx = Column(Integer)
|
||||||
|
imgy = Column(Integer)
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
UniqueConstraint('filepath'),
|
UniqueConstraint('filepath'),
|
||||||
UniqueConstraint('fhash'),
|
UniqueConstraint('fhash'),
|
||||||
|
Index('phash_bktree_idx', 'phash', postgresql_using="spgist")
|
||||||
)
|
)
|
||||||
|
|
||||||
def tag_creator(tag):
|
def tag_creator(tag):
|
||||||
|
@ -204,6 +211,7 @@ class Releases(Base):
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
UniqueConstraint('postid', 'source'),
|
UniqueConstraint('postid', 'source'),
|
||||||
|
Index('db_releases_source_state_id_idx', 'source', 'state', 'id')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ class AbstractFetcher(object, metaclass=abc.ABCMeta):
|
||||||
self.log = logging.getLogger(self.loggerpath)
|
self.log = logging.getLogger(self.loggerpath)
|
||||||
self.wg = util.WebRequest.WebGetRobust(logPath=self.loggerpath+".Web")
|
self.wg = util.WebRequest.WebGetRobust(logPath=self.loggerpath+".Web")
|
||||||
|
|
||||||
|
self.jobs_queued = []
|
||||||
|
|
||||||
|
|
||||||
def get_job(self):
|
def get_job(self):
|
||||||
|
@ -52,35 +53,39 @@ class AbstractFetcher(object, metaclass=abc.ABCMeta):
|
||||||
while 1:
|
while 1:
|
||||||
self.log.info("Getting job")
|
self.log.info("Getting job")
|
||||||
try:
|
try:
|
||||||
|
if not self.jobs_queued:
|
||||||
|
raw_query = '''
|
||||||
|
UPDATE
|
||||||
|
db_releases
|
||||||
|
SET
|
||||||
|
state = 'fetching'
|
||||||
|
WHERE
|
||||||
|
db_releases.id in (
|
||||||
|
SELECT
|
||||||
|
db_releases.id
|
||||||
|
FROM
|
||||||
|
db_releases
|
||||||
|
WHERE
|
||||||
|
db_releases.state = 'new'
|
||||||
|
AND
|
||||||
|
source = :source
|
||||||
|
ORDER BY
|
||||||
|
db_releases.postid ASC
|
||||||
|
LIMIT 500
|
||||||
|
)
|
||||||
|
AND
|
||||||
|
db_releases.state = 'new'
|
||||||
|
RETURNING
|
||||||
|
db_releases.id;
|
||||||
|
'''
|
||||||
|
|
||||||
raw_query = '''
|
rids = session.execute(text(raw_query), {'source' : self.pluginkey})
|
||||||
UPDATE
|
ridl = list(rids)
|
||||||
db_releases
|
self.jobs_queued = [tmp[0] for tmp in ridl]
|
||||||
SET
|
|
||||||
state = 'fetching'
|
|
||||||
WHERE
|
|
||||||
db_releases.id in (
|
|
||||||
SELECT
|
|
||||||
db_releases.id
|
|
||||||
FROM
|
|
||||||
db_releases
|
|
||||||
WHERE
|
|
||||||
db_releases.state = 'new'
|
|
||||||
AND
|
|
||||||
source = :source
|
|
||||||
ORDER BY
|
|
||||||
db_releases.postid ASC
|
|
||||||
LIMIT 1
|
|
||||||
)
|
|
||||||
AND
|
|
||||||
db_releases.state = 'new'
|
|
||||||
RETURNING
|
|
||||||
db_releases.id;
|
|
||||||
'''
|
|
||||||
|
|
||||||
rids = session.execute(text(raw_query), {'source' : self.pluginkey})
|
assert self.jobs_queued
|
||||||
ridl = list(rids)
|
|
||||||
rid = ridl[0][0]
|
rid = self.jobs_queued.pop()
|
||||||
|
|
||||||
|
|
||||||
job = db.session.query(db.Releases) \
|
job = db.session.query(db.Releases) \
|
||||||
|
|
Loading…
Reference in New Issue