Compare commits

..

9 Commits

3
.gitignore vendored

@ -129,3 +129,6 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
# Kate swap files
*.kate-swp

@ -1,62 +0,0 @@
import xml.etree.ElementTree
import hashdb
class DatImportError(Exception):
'''This error is raised when a DAT import fails.'''
# TODO: l o g g i n g
# TODO: Consider using a context object to avoid keeping large XML trees in memory.
class Dat:
'''A Dat object processes DAT files into the data structures defined in hashdb. '''
def __init__(self, filename):
'''Open the given DAT file and gather metadata from it.'''
xml_tree = xml.etree.ElementTree.parse(filename)
self._xml_root = xml_tree.getroot()
dat_header = self._xml_root.find('header')
self.info = hashdb.DatInfo(name=dat_header.find('name').text,
description=dat_header.find('description').text,
platform=None,
version=dat_header.find('version').text)
def set_platform(self, platform_info):
'''
Set a platform for this DAT file.
DAT files don't include platform metadata, but are all platform-specific.
'''
new_info = hashdb.DatInfo(name=self.info.name,
description=self.info.description,
platform=platform_info.shortcode,
version=self.info.version)
self.info = new_info
def set_name(self, new_name):
'''
Override the DAT file's name.
DAT files often have less-than-helpful names.
'''
new_info = hashdb.DatInfo(name=new_name,
description=self.info.description,
platform=self.info.platform,
version=self.info.version)
self.info = new_info
def read_all_hashes(self):
'''Read every hash in the DAT file and return it as a large list of RomInfo tuples.'''
if self.info.platform is None:
raise DatImportError('DAT platform not set.')
rom_info_list = []
all_rom_entries = self._xml_root.findall('.//rom')
for rom in all_rom_entries:
rom_info = hashdb.RomInfo(sha1sum=rom.get('sha1'),
filename=rom.get('name'),
platform=self.info.platform,
datorigin=self.info.name)
rom_info_list.append(rom_info)
return rom_info_list

240
lark

@ -11,7 +11,6 @@ import os
import uuid import uuid
import xdg.BaseDirectory import xdg.BaseDirectory
#import dat
import metadata import metadata
HASH_CHUNK_SIZE = 10485760 # 10mb HASH_CHUNK_SIZE = 10485760 # 10mb
@ -20,46 +19,25 @@ SQLITE_FILENAME = 'metadata.db'
data_path = os.path.join(xdg.BaseDirectory.xdg_data_home, 'lark') data_path = os.path.join(xdg.BaseDirectory.xdg_data_home, 'lark')
def get_sha1sum(filename): def get_sha1sum(filename):
sha1sum = hashlib.sha1() sha1sum = hashlib.sha1()
with open(filename, 'rb') as file_contents: with open(filename, 'rb') as file_contents:
while True: while True:
chunk = file_contents.read(HASH_CHUNK_SIZE) chunk = file_contents.read(HASH_CHUNK_SIZE)
if not chunk: if not chunk:
break break
sha1sum.update(chunk) sha1sum.update(chunk)
return sha1sum.hexdigest() return sha1sum.hexdigest()
'''
smd_dat = dat(SMD_DAT_PATH)
# TODO: Default to '.'
# TODO: Use a proper arg parser.
search_dir = sys.argv[1]
for filename in os.listdir(search_dir):
# TODO: Ignore or descend into directories.
# TODO: Compare hashes
file_path = os.path.abspath(os.path.join(search_dir, filename))
file_sha1 = get_sha1sum(file_path)
search_result = smd_dat.search_by_sha1(file_sha1)
if search_result:
rom_data = search_result[0]
print('File %s matches database entry for %s.' % (filename, rom_data.filename))
else:
print('File %s is not in database.' % filename)
'''
# Test code! :D
# TODO: Write test code that doesn't depend on external resources. # TODO: Write test code that doesn't depend on external resources.
SMD_DAT_PATH = '/home/lumia/Downloads/Sega - Mega Drive - Genesis (20200303-035539).dat'
TEST_HASH = 'cfbf98c36c776677290a872547ac47c53d2761d6'
def _kwargs_parse(kwargs_list): def _kwargs_parse(kwargs_list):
kwargs = {} kwargs = {}
for kwarg_string in kwargs_list: for kwarg_string in kwargs_list:
key, value = kwarg_string.split('=') key, value = kwarg_string.split('=')
kwargs[key] = value kwargs[key] = value
return kwargs return kwargs
action_object = sys.argv[1] action_object = sys.argv[1]
action = sys.argv[2] action = sys.argv[2]
@ -68,102 +46,102 @@ metadata.configure(os.path.join(data_path, SQLITE_FILENAME))
# TODO: Use a real UI library. This mess is just intended for development. # TODO: Use a real UI library. This mess is just intended for development.
if action_object == 'platform': if action_object == 'platform':
if action == 'add': if action == 'add':
print('add a platform') print('add a platform')
platform_shortcode = sys.argv[3] platform_shortcode = sys.argv[3]
platform_name = sys.argv[4] platform_name = sys.argv[4]
platform_data = metadata.Platform(shortcode=platform_shortcode, platform_data = metadata.Platform(shortcode=platform_shortcode,
fullname=platform_name) fullname=platform_name)
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
session.add(platform_data) session.add(platform_data)
elif action == 'list': elif action == 'list':
# TODO: Filter support is exclusively limited to SQLAlchemy's filter.ilike function. Figure # TODO: Filter support is exclusively limited to SQLAlchemy's filter.ilike function. Figure
# out a good way to include other filters. # out a good way to include other filters.
filters = _kwargs_parse(sys.argv[3:]) filters = _kwargs_parse(sys.argv[3:])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
print(metadata.search(session, metadata.Platform, **filters)) print(metadata.search(session, metadata.Platform, **filters))
elif action == 'remove': elif action == 'remove':
constraints = sys.argv[3:] constraints = sys.argv[3:]
filters = _kwargs_parse(sys.argv[3:]) filters = _kwargs_parse(sys.argv[3:])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
platforms = metadata.search(session, metadata.Platform, **filters) platforms = metadata.search(session, metadata.Platform, **filters)
for platform in platforms: for platform in platforms:
print('Removing %s.' % platform.fullname) print('Removing %s.' % platform.fullname)
session.delete(platform) session.delete(platform)
elif action == 'test': elif action == 'test':
# TODO: Delete this action before merging into dev. It's just for ugly testing. # TODO: Delete this action before merging into dev. It's just for ugly testing.
platform_shortcode = sys.argv[3] platform_shortcode = sys.argv[3]
platform_name = sys.argv[4] platform_name = sys.argv[4]
platform_data = metadata.Platform(shortcode=platform_shortcode, platform_data = metadata.Platform(shortcode=platform_shortcode,
fullname=platform_name) fullname=platform_name)
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
print(metadata.search(session, metadata.Platform)) print(metadata.search(session, metadata.Platform))
elif action_object == 'release-group': elif action_object == 'release-group':
if action == 'add': if action == 'add':
properties = _kwargs_parse(sys.argv[3:]) properties = _kwargs_parse(sys.argv[3:])
release_group = metadata.ReleaseGroup(name=properties['name']) release_group = metadata.ReleaseGroup(name=properties['name'])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
if properties['platform']: if properties['platform']:
platform = metadata.search(session, metadata.Platform, platform = metadata.search(session, metadata.Platform,
shortcode=properties['platform'])[0] shortcode=properties['platform'])[0]
release_group.platform = platform release_group.platform = platform
session.add(release_group) session.add(release_group)
if action == 'list': if action == 'list':
# TODO: Filter support is exclusively limited to SQLAlchemy's filter.ilike function. Figure # TODO: Filter support is exclusively limited to SQLAlchemy's filter.ilike function. Figure
# out a good way to include other filters. # out a good way to include other filters.
print('Listing release groups.') print('Listing release groups.')
filters = _kwargs_parse(sys.argv[3:]) filters = _kwargs_parse(sys.argv[3:])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
print(metadata.search(session, metadata.ReleaseGroup, **filters)) print(metadata.search(session, metadata.ReleaseGroup, **filters))
elif action == 'remove': elif action == 'remove':
constraints = sys.argv[3:] constraints = sys.argv[3:]
filters = _kwargs_parse(sys.argv[3:]) filters = _kwargs_parse(sys.argv[3:])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
release_groups = metadata.search(session, metadata.ReleaseGroup, **filters) release_groups = metadata.search(session, metadata.ReleaseGroup, **filters)
for release_group in release_groups: for release_group in release_groups:
print('Removing %s.' % release_group.name) print('Removing %s.' % release_group.name)
session.delete(release_group) session.delete(release_group)
elif action_object == 'release': elif action_object == 'release':
if action == 'add': if action == 'add':
properties = _kwargs_parse(sys.argv[3:]) properties = _kwargs_parse(sys.argv[3:])
release = metadata.Release(**properties) release = metadata.Release(**properties)
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
if properties['release-group']: if properties['release-group']:
release_group = metadata.search(session, metadata.ReleaseGroup, release_group = metadata.search(session, metadata.ReleaseGroup,
name=properties['release-group'])[0] name=properties['release-group'])[0]
release.release_group = release_group release.release_group = release_group
session.add(release) session.add(release)
if action == 'list': if action == 'list':
# TODO: Filter support is exclusively limited to SQLAlchemy's filter.ilike function. Figure # TODO: Filter support is exclusively limited to SQLAlchemy's filter.ilike function. Figure
# out a good way to include other filters. # out a good way to include other filters.
print('Listing releases.') print('Listing releases.')
filters = _kwargs_parse(sys.argv[3:]) filters = _kwargs_parse(sys.argv[3:])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
print(metadata.search(session, metadata.Release, **filters)) print(metadata.search(session, metadata.Release, **filters))
elif action == 'remove': elif action == 'remove':
constraints = sys.argv[3:] constraints = sys.argv[3:]
filters = _kwargs_parse(sys.argv[3:]) filters = _kwargs_parse(sys.argv[3:])
with metadata.get_db_session() as session: with metadata.get_db_session() as session:
release_groups = metadata.search(session, metadata.Release, **filters) release_groups = metadata.search(session, metadata.Release, **filters)
for release in release_groups: for release in release_groups:
print('Removing %s.' % release.name) print('Removing %s.' % release.name)
session.delete(release) session.delete(release)
else: else:
print('Unknown object.') print('Unknown object.')

@ -1,41 +1,79 @@
# Metadata definitions # Metadata definitions
## Release
- A single release of a game. For example: Star Fox 64 (U) v1.2
* UUID ## Platform
* sha1sum A single set of hardware and/or software that shares compatibility. For example:
* format (For now this is just the file extension) - Nintendo Entertainment System
* region - Microsoft Windows 7
* version - Microsoft Windows 98
* disambiguation
* release group * name
- The English name of the platform. As with release group names, this is mostly for
hand-editing data.
* regional_names
- A hash map with the keys being language identifers (en-US, jp, fr, etc) and the values
being the platform's name in the language and script it was released in.
* shortcode
- A small, three to five letter code for the platform. This must be unique among other
platforms.
* release groups
- A list containing release groups for this platform.
## Release group ## Release group
- A container for all versions of a release. A container for all languages and versions of a release. For example, the Star Fox 64 release
For example, the Star Fox 64 release group would contain: group would contain all of the following releases:
Star Fox 64 (U) v1.2 - Star Fox 64 (U) v1.2
Star Fox 64 (U) v1.0 - Star Fox 64 (U) v1.0
Star Fox 64 (J) v1.0 - Star Fox 64 (J) v1.0
etc. - Lylat Wars (E) v1.0
* UUID * name
* name - The English name for the release group. This is largely for contributors to quickly
* platform understand the raw data, as frontends should ideally pull the name from a configured
region or something.
* releases
- A list containing all releases in this release group.
## Platform ## Release
- A single set of hardware and/or software that shares compatibility A single release of a game. For example: Star Fox 64 (U) v1.2
Examples:
Nintendo Entertainment System * name
Microsoft Windows 7 - The release's name in the language and script it was released in.
Microsoft Windows 98 * region
- The official release code for the game.
* UUID * version
* name - The release version of the game. Some are verison numbers, some are just sequential
* shortcode release numbers.
* disambiguation (can be empty)
## DAT credits - If this release is different in a way that isn't region or version, that information goes
- A large list of hashes imported from other sources. here.
* DAT name * images
* DAT website - A list of all images of this release.
* DAT version ## Image
* image UUID list A stored copy of a game's disk image. For example:
- Star Fox 64 (U) v1.2.z64
- Star Fox 64 (U) v1.2.v64
* format
- The format this image is stored in. For example: bin, iso, chd, z64, v64
* sha1sum
- The sha1sum hash of this specific image.
* dump credit
- Where the hash came from, who dumped it, etc
* patches (can be empty)
- A list of patches that are intended to apply to this image.
## Patches
Unofficial patches or mods for a game, usually called romhacks.
* name
- The patch's name in it's original language.
* file sha1sum
- The patch file's sha1sum.
# Design notes
* This spec is designed with the intent to be imported into databases while also being reasonably
hand-editable for contributors.
* Releases have multiple images to account for different formats. The z64 and v64 images of the
same N64 game are different files with different hashes.

@ -13,129 +13,156 @@ import sqlalchemy.orm
# TODO: l o g g i n g # TODO: l o g g i n g
HASH_CHUNK_SIZE = 10485760 # 10mb HASH_CHUNK_SIZE = 10485760 # 10mb
_db_session_maker = sqlalchemy.orm.sessionmaker()
_engine = None
_configured = False
# TODO: Support DAT credit, DAT filenames, and checking DAT completeness. _SQLBase = sqlalchemy.ext.declarative.declarative_base()
#DatData = collections.namedtuple('DatData', 'UUID, name, website, version, image_list')
class MetadataDBSessionException(Exception): '''
'''This exception is raised when something goes wrong with a database session.''' Metadata ORM classes for SQLAlchemy. For a detailed description of each piece of data, refer to
metadata/README.md
'''
class Platform(_SQLBase):
'''SQLAlchemy ORM class for platform metadata.'''
__tablename__ = 'platforms'
id = sqlalchemy.Column(
sqlalchemy.Integer, sqlalchemy.Sequence('platform_id_sequence'), primary_key=True
)
fullname = sqlalchemy.Column(sqlalchemy.String, nullable=False)
shortcode = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
regional_names = sqlalchemy.Column(sqlalchemy.String)
def _uuidgen(): release_groups = sqlalchemy.orm.relationship(
return str(uuid.uuid4()) 'ReleaseGroup', order_by=ReleaseGroup.id, back_populates='platform'
)
_SQLBase = sqlalchemy.ext.declarative.declarative_base() def __repr__(self):
return 'Platform: id: %s, fullname: %s, shortcode: %s' % (
self.id, self.fullname, self.shortcode
)
class Release(_SQLBase):
'''SQLAlchemy ORM class for ROM image metadata.'''
__tablename__ = 'images'
id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.Sequence('image_id_sequence'),
primary_key=True)
uuid = sqlalchemy.Column(sqlalchemy.String, nullable=False, default=_uuidgen)
sha1sum = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
format = sqlalchemy.Column(sqlalchemy.String, nullable=False)
region = sqlalchemy.Column(sqlalchemy.String)
version = sqlalchemy.Column(sqlalchemy.String, nullable=False)
disambiguation = sqlalchemy.Column(sqlalchemy.String)
release_group_id = sqlalchemy.Column(sqlalchemy.Integer,
sqlalchemy.ForeignKey('release_groups.id'))
release_group = sqlalchemy.orm.relationship('ReleaseGroup', back_populates='releases')
def __repr__(self):
return ('ROM Image: id: %s, uuid: %s, sha1sum: %s, release-group: %s, region: %s, '
'version: %s, disambiguation: %s' % (
self.id, self.uuid, self.sha1sum, self.release_group.name, self.region,
self.version, self.disambiguation))
class ReleaseGroup(_SQLBase): class ReleaseGroup(_SQLBase):
'''SQLAlchemy ORM class for release group metadata.''' '''SQLAlchemy ORM class for release group metadata.'''
__tablename__ = 'release_groups' __tablename__ = 'release_groups'
id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.Sequence('image_id_sequence'), id = sqlalchemy.Column(
primary_key=True) sqlalchemy.Integer, sqlalchemy.Sequence('release_group_id_sequence'), primary_key=True
uuid = sqlalchemy.Column(sqlalchemy.String, nullable=False, default=_uuidgen) )
name = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False) name = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
platform_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('platforms.id')) platform_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('platforms.id'))
platform = sqlalchemy.orm.relationship('Platform', back_populates='release_groups')
images = sqlalchemy.orm.relationship('Release', back_populates='release_group')
def __repr__(self): releases = sqlalchemy.orm.relationship('Release', back_populates='release_group')
return 'Release Group: id: %s, uuid: %s, name: %s, platform:%s' % (self.id, self.uuid,
self.name, self.platform.fullname)
def __repr__(self):
return 'Release Group: id: %s, name: %s, platform:%s' % (
self.id, self.name, self.platform.fullname
)
class Platform(_SQLBase): class Release(_SQLBase):
'''SQLAlchemy ORM class for platform metadata.''' '''SQLAlchemy ORM class for release metadata.'''
__tablename__ = 'platforms' __tablename__ = 'releases'
id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.Sequence('platform_id_sequence'), id = sqlalchemy.Column(
primary_key=True) sqlalchemy.Integer, sqlalchemy.Sequence('release_id_sequence'), primary_key=True
uuid = sqlalchemy.Column(sqlalchemy.String, nullable=False, default=_uuidgen) )
fullname = sqlalchemy.Column(sqlalchemy.String, nullable=False) sha1sum = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
shortcode = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False) en_name = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
release_group_id = sqlalchemy.Column(
release_groups = sqlalchemy.orm.relationship('ReleaseGroup', order_by=ReleaseGroup.id, sqlalchemy.Integer, sqlalchemy.ForeignKey('release_groups.id')
back_populates='platform') )
def __repr__(self): release_group = sqlalchemy.orm.relationship('ReleaseGroup', back_populates='releases')
return 'Platform: id: %s, uuid: %s, fullname: %s, shortcode: %s' % (self.id, self.uuid,
self.fullname, self.shortcode) def __repr__(self):
return ('Release: id: %s, en_name: %s' % (self.id, self.en_name )
def configure(db_path):
''' class Image(_SQLBase):
Configure and initialize the database for the entire module. '''SQLAlchemy ORM class for ROM image metadata.'''
Currently, only SQLite is supported. __tablename__ = 'images'
id = sqlalchemy.Column(
db_path: Path for the SQLite database sqlalchemy.Integer, sqlalchemy.Sequence('image_id_sequence'), primary_key=True
''' )
_engine = sqlalchemy.create_engine('sqlite:///%s' % db_path) sha1sum = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
format = sqlalchemy.Column(sqlalchemy.String, nullable=False)
_SQLBase.metadata.create_all(_engine) region = sqlalchemy.Column(sqlalchemy.String, nullable=False)
_db_session_maker.configure(bind=_engine) version = sqlalchemy.Column(sqlalchemy.String, nullable=False)
_configured = True disambiguation = sqlalchemy.Column(sqlalchemy.String)
release_group_id = sqlalchemy.Column(
# TODO: Passing the session object is a little clunky. Maybe there's a way to infer it somehow? sqlalchemy.Integer,sqlalchemy.ForeignKey('release_groups.id')
def search(session, table_object, **constraints): )
'''
Search the database for entries matching the given constraints. release_group = sqlalchemy.orm.relationship('ReleaseGroup', back_populates='releases')
session: SQLAlchemy session, presumably from get_db_session def __repr__(self):
table_object: SQLAlchemy ORM table object, defined in the file above return (
constraints: key-value pairs to match against specific fields in the database 'ROM Image: id: %s, sha1sum: %s, release-group: %s, region: %s, version: %s, '
Note: Currently, only the query.ilike method is supported. This is intended to 'disambiguation: %s' % (
eventually support the entire range of available filters. self.id, self.sha1sum, self.release_group.name, self.region, self.version,
''' self.disambiguation
query = session.query(table_object) )
)
for key, value in constraints.items():
query = query.filter(getattr(table_object, key).ilike('%%%s%%' % value)) class hashdb:
_engine = None
item_list = []
for item in query: # TODO: db_path's default should be set here, not in frontend.
item_list.append(item) def __init__(self, db_path):
'''
return item_list Configure and initialize the database for the entire module.
Currently, only SQLite is supported.
@contextlib.contextmanager
def get_db_session(): db_path: Path for the SQLite database
'''Get a SQLAlchemy database session with a proper context object. ''' '''
# TODO: There's probably a more reliable way of knowing whether the database was configured. self._engine = sqlalchemy.create_engine('sqlite:///%s' % db_path)
if not _configured:
raise MetadataDBSessionException('Tried to get session without configuring a database.') _SQLBase.metadata.create_all(self._engine)
self._session_maker.configure(bind=self._engine)
session = _db_session_maker()
try: def search(self, table_object, **constraints):
yield session '''
Search the database for entries matching the given constraints.
except:
# TODO: Decide which exceptions to handle/eat here and which ones belong in UI. table_object: SQLAlchemy ORM table object, defined in the file above
# This one is okay to put off until you start really building UI. constraints: key-value pairs to match against specific fields in the database
session.rollback() Note: Currently, only the query.ilike method is supported. This is intended to
raise eventually support the entire range of available filters.
else: '''
session.commit()
finally: with self._get_db_session() as session:
session.close() # TODO: Consider making this return data recursively on items that reference other
# tables.
query = session.query(table_object)
for key, value in constraints.items():
query = query.filter(getattr(table_object, key).ilike('%%%s%%' % value))
item_list = []
for item in query:
item_list.append(item)
return item_list
def import_json(self, json_file):
'''
Import metadata from a json file.
'''
# json files can be large, but not too large for ram
# do not exit on invalid metadata, log the error and skip the object
pass
@contextlib.contextmanager
def _get_db_session():
'''Get a SQLAlchemy database session with a proper context object. '''
session = sqlalchemy.orm.sessionmaker()
try:
yield session
except:
# TODO: Decide which exceptions to handle/eat here and which ones belong in UI.
# This one is okay to put off until you start really building UI.
session.rollback()
raise
else:
session.commit()
finally:
session.close()

@ -8,17 +8,11 @@ directory structure.
## Planned features ## Planned features
* Validate ROM images. * Validate ROM images.
* Import DAT files
* Rename/move ROM files * Rename/move ROM files
* Maintain a database of present ROMs * Maintain a database of present ROMs
* A nice, Beets-like interface * A nice, Beets-like interface
* Grouping ROMS in archive files * Grouping ROMS in archive files
## Known issues ## Known issues
* This probably isn't terribly efficient. It's Python parsing XML into an SQLite database and I only * This probably isn't terribly efficient. It's Python parsing JSON into an SQLite database and I
know pretty basic database design. onlyknow pretty basic database design.
* Python's `xml.etree` module has a couple of known security issues[1]. Stick to importing DATs from
known places and it shouldn't be an issue.
[1] - https://docs.python.org/3/library/xml.html#xml-vulnerabilities

Loading…
Cancel
Save