diff options
| -rw-r--r-- | .gitignore | 162 | ||||
| -rw-r--r-- | LICENSE | 21 | ||||
| -rw-r--r-- | README.md | 15 | ||||
| -rwxr-xr-x | countdistros.py | 59 | ||||
| -rwxr-xr-x | countdistros_pichart.py | 20 | ||||
| -rw-r--r-- | requirements.txt | bin | 0 -> 352 bytes |
6 files changed, 277 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a1f6de2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +reports_piiremoved.json + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 seth + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6672034 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# countDistros + +these are just python scripts that scrape through [ProtonDB](https://www.protondb.com/) data (published [here](https://github.com/bdefore/protondb-data)), count how many times a distro is reported as being used, and can (optionally) display a pie chart showing the results. + +## examples + +to view plain text results: +```sh +python countdistros.py reports.json +``` + +to show a pie chart: +```sh +python countdistros_pichart.py reports.json +``` diff --git a/countdistros.py b/countdistros.py new file mode 100755 index 0000000..ccdb19c --- /dev/null +++ b/countdistros.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +import json +from multiprocessing import Manager, Process +import re +import sys + +# array of distro names (EDIT THIS AND PROBABLY USE CAPITALIZATION) +arr = [ 'Ubuntu', 'Linux Mint', 'Pop!_OS', 'Debian', 'Arch Linux', 'Manjaro Linux', 'Fedora' ] + +def get_count(distro, db, data): + # print how many times distro appears in report + # by matching its' name to data using regex + rg = re.compile(distro) + count = 0 + for item in db: + if re.search(rg, item['systemInfo']['os']): + count += 1 + data[distro] = str(count) + + +def get_data(filename='reports_piiremoved.json'): + # use default file name if file is not supplied + if len(sys.argv) > 1: + filename = sys.argv[1] + else: + print('defaulting to ' + filename + ' since no argument was supplied') + + # try to load file as json + db = None + try: + with open(filename) as file: + db = json.load(file) + except: + err = filename + ' doesn\'t exist or isn\'t valid json!' + print(err) + sys.exit(1) + + # get distro count asynchronously :trollfig: + data = Manager().dict() + procs = [] + for distro in arr: + proc = Process(target=get_count, args=(distro, db, data)) + procs.append(proc) + proc.start() + + for proc in procs: + proc.join() + + return data + + +def print_data(): + data = get_data() + for key in data: + print(key + ': ' + data[key]) + + +if __name__ == "__main__": + print_data() diff --git a/countdistros_pichart.py b/countdistros_pichart.py new file mode 100755 index 0000000..ed66d71 --- /dev/null +++ b/countdistros_pichart.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import countdistros as countd +import matplotlib.pyplot as plt + +def run(): + data = countd.get_data() + + labels = [] + values = [] + for x, y in data.items(): + labels.append(x) + values.append(y) + + plt.pie(values, labels=labels) + plt.axis('equal') + plt.show() + +if __name__ == '__main__': + run() diff --git a/requirements.txt b/requirements.txt Binary files differnew file mode 100644 index 0000000..1e66fed --- /dev/null +++ b/requirements.txt |
