Compare commits

..

9 Commits

3
.gitignore vendored

@ -129,3 +129,6 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
# Kate swap files
*.kate-swp

@ -1,62 +0,0 @@
import xml.etree.ElementTree
import hashdb
class DatImportError(Exception):
'''This error is raised when a DAT import fails.'''
# TODO: l o g g i n g
# TODO: Consider using a context object to avoid keeping large XML trees in memory.
class Dat:
'''A Dat object processes DAT files into the data structures defined in hashdb. '''
def __init__(self, filename):
'''Open the given DAT file and gather metadata from it.'''
xml_tree = xml.etree.ElementTree.parse(filename)
self._xml_root = xml_tree.getroot()
dat_header = self._xml_root.find('header')
self.info = hashdb.DatInfo(name=dat_header.find('name').text,
description=dat_header.find('description').text,
platform=None,
version=dat_header.find('version').text)
def set_platform(self, platform_info):
'''
Set a platform for this DAT file.
DAT files don't include platform metadata, but are all platform-specific.
'''
new_info = hashdb.DatInfo(name=self.info.name,
description=self.info.description,
platform=platform_info.shortcode,
version=self.info.version)
self.info = new_info
def set_name(self, new_name):
'''
Override the DAT file's name.
DAT files often have less-than-helpful names.
'''
new_info = hashdb.DatInfo(name=new_name,
description=self.info.description,
platform=self.info.platform,
version=self.info.version)
self.info = new_info
def read_all_hashes(self):
'''Read every hash in the DAT file and return it as a large list of RomInfo tuples.'''
if self.info.platform is None:
raise DatImportError('DAT platform not set.')
rom_info_list = []
all_rom_entries = self._xml_root.findall('.//rom')
for rom in all_rom_entries:
rom_info = hashdb.RomInfo(sha1sum=rom.get('sha1'),
filename=rom.get('name'),
platform=self.info.platform,
datorigin=self.info.name)
rom_info_list.append(rom_info)
return rom_info_list

22
lark

@ -11,7 +11,6 @@ import os
import uuid import uuid
import xdg.BaseDirectory import xdg.BaseDirectory
#import dat
import metadata import metadata
HASH_CHUNK_SIZE = 10485760 # 10mb HASH_CHUNK_SIZE = 10485760 # 10mb
@ -30,28 +29,7 @@ def get_sha1sum(filename):
return sha1sum.hexdigest() return sha1sum.hexdigest()
'''
smd_dat = dat(SMD_DAT_PATH)
# TODO: Default to '.'
# TODO: Use a proper arg parser.
search_dir = sys.argv[1]
for filename in os.listdir(search_dir):
# TODO: Ignore or descend into directories.
# TODO: Compare hashes
file_path = os.path.abspath(os.path.join(search_dir, filename))
file_sha1 = get_sha1sum(file_path)
search_result = smd_dat.search_by_sha1(file_sha1)
if search_result:
rom_data = search_result[0]
print('File %s matches database entry for %s.' % (filename, rom_data.filename))
else:
print('File %s is not in database.' % filename)
'''
# Test code! :D
# TODO: Write test code that doesn't depend on external resources. # TODO: Write test code that doesn't depend on external resources.
SMD_DAT_PATH = '/home/lumia/Downloads/Sega - Mega Drive - Genesis (20200303-035539).dat'
TEST_HASH = 'cfbf98c36c776677290a872547ac47c53d2761d6'
def _kwargs_parse(kwargs_list): def _kwargs_parse(kwargs_list):
kwargs = {} kwargs = {}

@ -1,41 +1,79 @@
# Metadata definitions # Metadata definitions
## Platform
A single set of hardware and/or software that shares compatibility. For example:
- Nintendo Entertainment System
- Microsoft Windows 7
- Microsoft Windows 98
* name
- The English name of the platform. As with release group names, this is mostly for
hand-editing data.
* regional_names
- A hash map with the keys being language identifers (en-US, jp, fr, etc) and the values
being the platform's name in the language and script it was released in.
* shortcode
- A small, three to five letter code for the platform. This must be unique among other
platforms.
* release groups
- A list containing release groups for this platform.
## Release group
A container for all languages and versions of a release. For example, the Star Fox 64 release
group would contain all of the following releases:
- Star Fox 64 (U) v1.2
- Star Fox 64 (U) v1.0
- Star Fox 64 (J) v1.0
- Lylat Wars (E) v1.0
* name
- The English name for the release group. This is largely for contributors to quickly
understand the raw data, as frontends should ideally pull the name from a configured
region or something.
* releases
- A list containing all releases in this release group.
## Release ## Release
- A single release of a game. For example: Star Fox 64 (U) v1.2 A single release of a game. For example: Star Fox 64 (U) v1.2
* UUID * name
* sha1sum - The release's name in the language and script it was released in.
* format (For now this is just the file extension)
* region * region
- The official release code for the game.
* version * version
* disambiguation - The release version of the game. Some are verison numbers, some are just sequential
* release group release numbers.
* disambiguation (can be empty)
- If this release is different in a way that isn't region or version, that information goes
here.
* images
- A list of all images of this release.
## Image
A stored copy of a game's disk image. For example:
- Star Fox 64 (U) v1.2.z64
- Star Fox 64 (U) v1.2.v64
## Release group * format
- A container for all versions of a release. - The format this image is stored in. For example: bin, iso, chd, z64, v64
For example, the Star Fox 64 release group would contain: * sha1sum
Star Fox 64 (U) v1.2 - The sha1sum hash of this specific image.
Star Fox 64 (U) v1.0 * dump credit
Star Fox 64 (J) v1.0 - Where the hash came from, who dumped it, etc
etc. * patches (can be empty)
- A list of patches that are intended to apply to this image.
* UUID
* name
* platform
## Platform ## Patches
- A single set of hardware and/or software that shares compatibility Unofficial patches or mods for a game, usually called romhacks.
Examples:
Nintendo Entertainment System
Microsoft Windows 7
Microsoft Windows 98
* UUID
* name * name
* shortcode - The patch's name in it's original language.
* file sha1sum
- The patch file's sha1sum.
# Design notes
* This spec is designed with the intent to be imported into databases while also being reasonably
hand-editable for contributors.
## DAT credits * Releases have multiple images to account for different formats. The z64 and v64 images of the
- A large list of hashes imported from other sources. same N64 game are different files with different hashes.
* DAT name
* DAT website
* DAT version
* image UUID list

@ -13,101 +13,122 @@ import sqlalchemy.orm
# TODO: l o g g i n g # TODO: l o g g i n g
HASH_CHUNK_SIZE = 10485760 # 10mb HASH_CHUNK_SIZE = 10485760 # 10mb
_db_session_maker = sqlalchemy.orm.sessionmaker()
_engine = None
_configured = False
# TODO: Support DAT credit, DAT filenames, and checking DAT completeness.
#DatData = collections.namedtuple('DatData', 'UUID, name, website, version, image_list')
class MetadataDBSessionException(Exception):
'''This exception is raised when something goes wrong with a database session.'''
def _uuidgen():
return str(uuid.uuid4())
_SQLBase = sqlalchemy.ext.declarative.declarative_base() _SQLBase = sqlalchemy.ext.declarative.declarative_base()
class Release(_SQLBase): '''
'''SQLAlchemy ORM class for ROM image metadata.''' Metadata ORM classes for SQLAlchemy. For a detailed description of each piece of data, refer to
__tablename__ = 'images' metadata/README.md
id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.Sequence('image_id_sequence'), '''
primary_key=True)
uuid = sqlalchemy.Column(sqlalchemy.String, nullable=False, default=_uuidgen)
sha1sum = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
format = sqlalchemy.Column(sqlalchemy.String, nullable=False)
region = sqlalchemy.Column(sqlalchemy.String)
version = sqlalchemy.Column(sqlalchemy.String, nullable=False)
disambiguation = sqlalchemy.Column(sqlalchemy.String)
release_group_id = sqlalchemy.Column(sqlalchemy.Integer,
sqlalchemy.ForeignKey('release_groups.id'))
release_group = sqlalchemy.orm.relationship('ReleaseGroup', back_populates='releases') class Platform(_SQLBase):
'''SQLAlchemy ORM class for platform metadata.'''
__tablename__ = 'platforms'
id = sqlalchemy.Column(
sqlalchemy.Integer, sqlalchemy.Sequence('platform_id_sequence'), primary_key=True
)
fullname = sqlalchemy.Column(sqlalchemy.String, nullable=False)
shortcode = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
regional_names = sqlalchemy.Column(sqlalchemy.String)
release_groups = sqlalchemy.orm.relationship(
'ReleaseGroup', order_by=ReleaseGroup.id, back_populates='platform'
)
def __repr__(self): def __repr__(self):
return ('ROM Image: id: %s, uuid: %s, sha1sum: %s, release-group: %s, region: %s, ' return 'Platform: id: %s, fullname: %s, shortcode: %s' % (
'version: %s, disambiguation: %s' % ( self.id, self.fullname, self.shortcode
self.id, self.uuid, self.sha1sum, self.release_group.name, self.region, )
self.version, self.disambiguation))
class ReleaseGroup(_SQLBase): class ReleaseGroup(_SQLBase):
'''SQLAlchemy ORM class for release group metadata.''' '''SQLAlchemy ORM class for release group metadata.'''
__tablename__ = 'release_groups' __tablename__ = 'release_groups'
id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.Sequence('image_id_sequence'), id = sqlalchemy.Column(
primary_key=True) sqlalchemy.Integer, sqlalchemy.Sequence('release_group_id_sequence'), primary_key=True
uuid = sqlalchemy.Column(sqlalchemy.String, nullable=False, default=_uuidgen) )
name = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False) name = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
platform_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('platforms.id')) platform_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('platforms.id'))
platform = sqlalchemy.orm.relationship('Platform', back_populates='release_groups') releases = sqlalchemy.orm.relationship('Release', back_populates='release_group')
images = sqlalchemy.orm.relationship('Release', back_populates='release_group')
def __repr__(self): def __repr__(self):
return 'Release Group: id: %s, uuid: %s, name: %s, platform:%s' % (self.id, self.uuid, return 'Release Group: id: %s, name: %s, platform:%s' % (
self.name, self.platform.fullname) self.id, self.name, self.platform.fullname
)
class Release(_SQLBase):
'''SQLAlchemy ORM class for release metadata.'''
__tablename__ = 'releases'
id = sqlalchemy.Column(
sqlalchemy.Integer, sqlalchemy.Sequence('release_id_sequence'), primary_key=True
)
sha1sum = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
en_name = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
release_group_id = sqlalchemy.Column(
sqlalchemy.Integer, sqlalchemy.ForeignKey('release_groups.id')
)
class Platform(_SQLBase): release_group = sqlalchemy.orm.relationship('ReleaseGroup', back_populates='releases')
'''SQLAlchemy ORM class for platform metadata.'''
__tablename__ = 'platforms'
id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.Sequence('platform_id_sequence'),
primary_key=True)
uuid = sqlalchemy.Column(sqlalchemy.String, nullable=False, default=_uuidgen)
fullname = sqlalchemy.Column(sqlalchemy.String, nullable=False)
shortcode = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
release_groups = sqlalchemy.orm.relationship('ReleaseGroup', order_by=ReleaseGroup.id,
back_populates='platform')
def __repr__(self): def __repr__(self):
return 'Platform: id: %s, uuid: %s, fullname: %s, shortcode: %s' % (self.id, self.uuid, return ('Release: id: %s, en_name: %s' % (self.id, self.en_name )
self.fullname, self.shortcode)
class Image(_SQLBase):
'''SQLAlchemy ORM class for ROM image metadata.'''
__tablename__ = 'images'
id = sqlalchemy.Column(
sqlalchemy.Integer, sqlalchemy.Sequence('image_id_sequence'), primary_key=True
)
sha1sum = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
format = sqlalchemy.Column(sqlalchemy.String, nullable=False)
region = sqlalchemy.Column(sqlalchemy.String, nullable=False)
version = sqlalchemy.Column(sqlalchemy.String, nullable=False)
disambiguation = sqlalchemy.Column(sqlalchemy.String)
release_group_id = sqlalchemy.Column(
sqlalchemy.Integer,sqlalchemy.ForeignKey('release_groups.id')
)
def configure(db_path): release_group = sqlalchemy.orm.relationship('ReleaseGroup', back_populates='releases')
def __repr__(self):
return (
'ROM Image: id: %s, sha1sum: %s, release-group: %s, region: %s, version: %s, '
'disambiguation: %s' % (
self.id, self.sha1sum, self.release_group.name, self.region, self.version,
self.disambiguation
)
)
class hashdb:
_engine = None
# TODO: db_path's default should be set here, not in frontend.
def __init__(self, db_path):
''' '''
Configure and initialize the database for the entire module. Configure and initialize the database for the entire module.
Currently, only SQLite is supported. Currently, only SQLite is supported.
db_path: Path for the SQLite database db_path: Path for the SQLite database
''' '''
_engine = sqlalchemy.create_engine('sqlite:///%s' % db_path) self._engine = sqlalchemy.create_engine('sqlite:///%s' % db_path)
_SQLBase.metadata.create_all(_engine) _SQLBase.metadata.create_all(self._engine)
_db_session_maker.configure(bind=_engine) self._session_maker.configure(bind=self._engine)
_configured = True
# TODO: Passing the session object is a little clunky. Maybe there's a way to infer it somehow? def search(self, table_object, **constraints):
def search(session, table_object, **constraints):
''' '''
Search the database for entries matching the given constraints. Search the database for entries matching the given constraints.
session: SQLAlchemy session, presumably from get_db_session
table_object: SQLAlchemy ORM table object, defined in the file above table_object: SQLAlchemy ORM table object, defined in the file above
constraints: key-value pairs to match against specific fields in the database constraints: key-value pairs to match against specific fields in the database
Note: Currently, only the query.ilike method is supported. This is intended to Note: Currently, only the query.ilike method is supported. This is intended to
eventually support the entire range of available filters. eventually support the entire range of available filters.
''' '''
with self._get_db_session() as session:
# TODO: Consider making this return data recursively on items that reference other
# tables.
query = session.query(table_object) query = session.query(table_object)
for key, value in constraints.items(): for key, value in constraints.items():
@ -119,14 +140,20 @@ def search(session, table_object, **constraints):
return item_list return item_list
@contextlib.contextmanager def import_json(self, json_file):
def get_db_session(): '''
Import metadata from a json file.
'''
# json files can be large, but not too large for ram
# do not exit on invalid metadata, log the error and skip the object
pass
@contextlib.contextmanager
def _get_db_session():
'''Get a SQLAlchemy database session with a proper context object. ''' '''Get a SQLAlchemy database session with a proper context object. '''
# TODO: There's probably a more reliable way of knowing whether the database was configured.
if not _configured:
raise MetadataDBSessionException('Tried to get session without configuring a database.')
session = _db_session_maker() session = sqlalchemy.orm.sessionmaker()
try: try:
yield session yield session

@ -8,17 +8,11 @@ directory structure.
## Planned features ## Planned features
* Validate ROM images. * Validate ROM images.
* Import DAT files
* Rename/move ROM files * Rename/move ROM files
* Maintain a database of present ROMs * Maintain a database of present ROMs
* A nice, Beets-like interface * A nice, Beets-like interface
* Grouping ROMS in archive files * Grouping ROMS in archive files
## Known issues ## Known issues
* This probably isn't terribly efficient. It's Python parsing XML into an SQLite database and I only * This probably isn't terribly efficient. It's Python parsing JSON into an SQLite database and I
know pretty basic database design. onlyknow pretty basic database design.
* Python's `xml.etree` module has a couple of known security issues[1]. Stick to importing DATs from
known places and it shouldn't be an issue.
[1] - https://docs.python.org/3/library/xml.html#xml-vulnerabilities

Loading…
Cancel
Save