-
Notifications
You must be signed in to change notification settings - Fork 138
shouldi: deptree: Create dependency tree of project #596
Description
https://github.com/intel/dffml/commits/shouldi_dep_tree
The idea behind the work that was done so far in the above branch was to produce the full dependency tree for a given python package. We'll want this to be the shouldi deptree
command (as opposed to shouldi install
)
It's currently in a state where it can grab package names out of setup.py files. It also needs to be able to grab them out of setup.cfg
files and requirements.txt files.
Another thing that's missing is the version number of the package. Right now the latest version of each package is being downloaded. We need to check if the package was pinned to a version and download that version (if example_package==0.3.1
for example we'd download version 0.3.1 of example_package). Come up with reasonable ways to handle all the following cases: https://stackoverflow.com/a/50842265/12310488
There is also a new output operation that needs to be made for this work. It's the Tree
output operation. It's not working at the moment. What it should do is output a dict
similar to the way config structures work with their use of plugin
and config
(you'll see what I'm about when you read the existing code.
- References
- https://pip.pypa.io/en/stable/reference/requirements-file-format/
- https://github.com/intel/dffml/blob/alice/docs/tutorials/rolling_alice/0001_coach_alice/0001_down_the_dependency_rabbit_hole_again.md
- https://github.com/pdxjohnny/use-cases/blob/openssf_metrics/openssf_metrics.md
- Example files
dffml/examples/shouldi/tests/test_dep_tree.py
Lines 1 to 169 in 0a2e053
import sys | |
import pathlib | |
import argparse | |
import unittest.mock | |
import importlib.util | |
from dffml.base import config | |
from dffml.util.os import chdir | |
from dffml.df.base import opimp_in, op | |
from dffml.df.types import Input, DataFlow | |
from dffml.service.dev import SetupPyKWArg | |
from dffml.operation.output import GetMulti | |
from dffml.df.memory import MemoryOrchestrator | |
from dffml.util.asynctestcase import AsyncTestCase | |
from shouldi.pypi import * | |
def remove_package_versions(packages): | |
no_versions = [] | |
appended = False | |
for package in packages: | |
for char in [">", "<", "="]: | |
if char in package: | |
no_versions.append(package.split(char)[0].strip()) | |
appended = True | |
break | |
if not appended: | |
no_versions.append(package.strip()) | |
appended = False | |
return no_versions | |
PACKAGE_DEPS_KWARGS = dict( | |
inputs={"src": pypi_package_contents.op.outputs["directory"],}, | |
outputs={"package": pypi_package_json.op.inputs["package"]}, | |
expand=["package"], | |
) | |
@op(**PACKAGE_DEPS_KWARGS) | |
async def package_deps_setup_py(src: str): | |
setup_py_path = list(pathlib.Path(src).rglob("**/setup.py")) | |
if not setup_py_path: | |
return | |
setup_py_path = setup_py_path[0] | |
deps = SetupPyKWArg.get_kwargs(str(setup_py_path)).get( | |
"install_requires", [] | |
) | |
no_versions = {} | |
print(src, remove_package_versions(deps)) | |
return {"package": remove_package_versions(deps)} | |
@op(**PACKAGE_DEPS_KWARGS) | |
async def package_deps_setup_cfg(src: str): | |
# TODO | |
return {"package": []} | |
@op(**PACKAGE_DEPS_KWARGS) | |
async def package_deps_requirements_txt(src: str): | |
# TODO | |
return {"package": []} | |
SUBFLOW = DataFlow.auto(*[opimp for opimp in opimp_in(sys.modules[__name__])]) | |
SUBFLOW.seed.append( | |
Input( | |
value=[pypi_package_json.op.inputs["package"].name], | |
definition=GetMulti.op.inputs["spec"], | |
) | |
) | |
# Do not allow package names in the subflow to re-trigger the whole subflow | |
# again, since this will cause version numbers and directories to get crossed | |
SUBFLOW.flow["pypi_package_json"].inputs["package"] = ["seed"] | |
SUBFLOW.update_by_origin() | |
def create_parent_flow(): | |
""" | |
This function exists so that shouldi_dataflow_as_operation doesn't end up | |
in the subflow when we grab from sys.modules[__name__] | |
""" | |
@config | |
class ShouldIDataFlowAsOperationConfig: | |
dataflow: DataFlow | |
@op( | |
inputs={"package": pypi_package_json.op.inputs["package"]}, | |
outputs={"package": pypi_package_json.op.inputs["package"]}, | |
expand=["package"], | |
config_cls=ShouldIDataFlowAsOperationConfig, | |
) | |
async def shouldi_dataflow_as_operation(self, package: str): | |
async with self.octx.parent(self.config.dataflow) as octx: | |
async for ctx, result in octx.run( | |
{ | |
package: [ | |
Input( | |
value=package, | |
definition=self.parent.op.inputs["package"], | |
) | |
] | |
} | |
): | |
packages = result[self.parent.op.inputs["package"].name] | |
# Remove input package from list | |
packages = list(filter(lambda pkg: pkg != package, packages)) | |
# TODO Deduplicate | |
return {"package": packages} | |
dataflow = DataFlow.auto(shouldi_dataflow_as_operation, GetMulti) | |
dataflow.seed.append( | |
Input( | |
value=[pypi_package_json.op.inputs["package"].name], | |
definition=GetMulti.op.inputs["spec"], | |
) | |
) | |
dataflow.configs[ | |
"shouldi_dataflow_as_operation" | |
] = ShouldIDataFlowAsOperationConfig(dataflow=SUBFLOW) | |
dataflow.flow["shouldi_dataflow_as_operation"].inputs["package"].append( | |
"seed" | |
) | |
dataflow.update_by_origin() | |
return dataflow | |
DATAFLOW = create_parent_flow() | |
class TestOperations(AsyncTestCase): | |
async def test_run(self): | |
check = {"shouldi": [], "dffml-config-yaml": []} | |
async with MemoryOrchestrator.withconfig({}) as orchestrator: | |
async with orchestrator(DATAFLOW) as octx: | |
async for ctx, results in octx.run( | |
{ | |
package_name: [ | |
Input( | |
value=package_name, | |
definition=pypi_package_json.op.inputs[ | |
"package" | |
], | |
), | |
] | |
for package_name in check.keys() | |
} | |
): | |
ctx_str = (await ctx.handle()).as_string() | |
with self.subTest(package=ctx_str): | |
print(ctx_str, results) | |
print(DATAFLOW.flow) | |
continue | |
self.assertEqual( | |
check[ctx_str], | |
results[ | |
pypi_package_json.op.inputs["package"].name | |
], | |
) |