Update tools to build packages from hydrated dependencies (#1958)

* Update tools to build packages with dependencies hydrated

Hydrated build is a new approach where all the dependencies are hydrated from pre-built RPMs. This provides two advantages.
 1. Faster build. As no package need to wait for its dependency's build to complete, all packages will be fired to build in parallel. (Practically with slight delay as firing of pre-built nodes takes place before).
 2. Failure of any dependency build will not cause failure to the current package, as the dependency is fetched from cached RPMs.

Introduced a new flag HYDRATED_BUILD to enable this feature. Also it is pre-requisite to run `make hydrate-toolchain` and `make hydrate-rpms` with appropriate tar files before using this option.

This is achieved by replacing the BUILD to RUN node edge with BUILD to PREBUILT node edge (clone of the RUN node) nodes (if there is a corresponding RPM hydrated already).

* Fix go fmt check failures

* Fix typos

* Introduce graphscrubber

graphscrubber is for preprocessing the build graph to scheduler.
Currently it is only doing hydrated build related work. Ideally all the
graph optimization related jobs to be moved from scheduler to
graphscrubber.

* go fmt tidy

* Remove unnecessary functions from graphscrubber

* Fix Make rule to build graphscrubber before using it

* Use inline hydrated-build flag

* Update documentaion for HYDRATED_BUILD flag

* Update changes recommended in PR discussion

* go tidy

* Remove unnecessary arguments from graphPreprocessor

* Update toolkit/tools/graphPreprocessor/graphPreprocessor.go

Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>

* Update toolkit/tools/graphPreprocessor/graphPreprocessor.go

Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>

* Update toolkit/tools/graphPreprocessor/graphPreprocessor.go

Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>

* retrugger checks

* Update toolkit/scripts/pkggen.mk

Co-authored-by: Daniel McIlvaney <damcilva@microsoft.com>

* Update toolkit/scripts/pkggen.mk

Co-authored-by: Daniel McIlvaney <damcilva@microsoft.com>

* Update toolkit/scripts/pkggen.mk

Co-authored-by: Daniel McIlvaney <damcilva@microsoft.com>

* Update toolkit/scripts/pkggen.mk

Co-authored-by: Daniel McIlvaney <damcilva@microsoft.com>

* retrugger checks

* retrugger checks

Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>
Co-authored-by: Daniel McIlvaney <damcilva@microsoft.com>
This commit is contained in:
Bala 2022-02-01 04:06:00 +00:00 committed by GitHub
parent daaf7a8549
commit 8ab9ef14a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 117 additions and 5 deletions

View File

@ -45,6 +45,7 @@ NUM_OF_ANALYTICS_RESULTS ?= 10
CLEANUP_PACKAGE_BUILDS ?= y
USE_PACKAGE_BUILD_CACHE ?= y
REBUILD_DEP_CHAINS ?= y
HYDRATED_BUILD ?= n
# Folder defines
toolkit_root := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))

View File

@ -52,6 +52,9 @@
- [`REFRESH_WORKER_CHROOT=...`](#refresh_worker_chroot)
- [`REFRESH_WORKER_CHROOT=`**`n`**](#refresh_worker_chrootn)
- [`REFRESH_WORKER_CHROOT=`**`y`** *(default)*](#refresh_worker_chrooty-default)
- [`HYDRATED_BUILD=...`](#hydrated_build)
- [`HYDRATED_BUILD=`**`y`**](#hydraded_buildy)
- [`HYDRATED_BUILD=`**`n`** *(default)*](#hydrated_build-default)
- [All Build Targets](#all-build-targets)
- [Reproducing a Build](#reproducing-a-build)
- [Build Summaries](#build-summaries)
@ -490,6 +493,16 @@ sudo make hydrate-rpms PACKAGE_ARCHIVE=./rpms.tar.gz
> - at least one of the RPM packages mentioned in the manifest file, or
> - the script responsible for building the chroot.
#### `HYDRATED_BUILD=...`]
##### `HYDRATED_BUILD=`**`y`**]
> If exists, all the dependency RUN nodes will be replaced with PreBuilt Nodes if those RPMs are hydrated already. So if any dependency package fails to build, the subsequent dependent packages will not be stuck as their dependency will be satisfied by hydrated RPM. This is even applicable to the packages mentioned in REBUILD_PACKAGES.
##### `HYDRATED_BUILD=`**`n`** *(default)*
> Normal build. No hydrated RPMs will be used.
## All Build Targets
These are the useful build targets:

View File

@ -31,6 +31,7 @@ validate-pkggen-config = $(STATUS_FLAGS_DIR)/validate-image-config-pkggen.flag
specs_file = $(PKGBUILD_DIR)/specs.json
graph_file = $(PKGBUILD_DIR)/graph.dot
cached_file = $(PKGBUILD_DIR)/cached_graph.dot
preprocessed_file = $(PKGBUILD_DIR)/preprocessed_graph.dot
built_file = $(PKGBUILD_DIR)/built_graph.dot
logging_command = --log-file=$(LOGS_DIR)/pkggen/workplan/$(notdir $@).log --log-level=$(LOG_LEVEL)
@ -123,6 +124,13 @@ $(cached_file): $(graph_file) $(go-graphpkgfetcher) $(chroot_worker) $(pkggen_lo
--output=$(cached_file) && \
touch $@
$(preprocessed_file): $(cached_file) $(go-graphPreprocessor)
$(go-graphPreprocessor) \
--input=$(cached_file) \
$(if $(filter y,$(HYDRATED_BUILD)),--hydrated-build) \
$(logging_command) \
--output=$@ && \
touch $@
######## PACKAGE BUILD ########
pkggen_archive = $(OUT_DIR)/rpms.tar.gz
@ -156,9 +164,9 @@ $(RPMS_DIR):
@touch $@
endif
$(STATUS_FLAGS_DIR)/build-rpms.flag: $(cached_file) $(chroot_worker) $(go-scheduler) $(go-pkgworker) $(depend_STOP_ON_PKG_FAIL) $(CONFIG_FILE) $(depend_CONFIG_FILE)
$(STATUS_FLAGS_DIR)/build-rpms.flag: $(preprocessed_file) $(chroot_worker) $(go-scheduler) $(go-pkgworker) $(depend_STOP_ON_PKG_FAIL) $(CONFIG_FILE) $(depend_CONFIG_FILE)
$(go-scheduler) \
--input="$(cached_file)" \
--input="$(preprocessed_file)" \
--output="$(built_file)" \
--workers="$(CONCURRENT_PACKAGE_BUILDS)" \
--work-dir="$(CHROOT_DIR)" \

View File

@ -19,6 +19,7 @@ go_tool_list = \
grapher \
graphpkgfetcher \
graphanalytics \
graphPreprocessor \
imageconfigvalidator \
imagepkgfetcher \
imager \

View File

@ -0,0 +1,89 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package main
import (
"os"
"gopkg.in/alecthomas/kingpin.v2"
"microsoft.com/pkggen/internal/exe"
"microsoft.com/pkggen/internal/logger"
"microsoft.com/pkggen/internal/pkggraph"
)
var (
app = kingpin.New("graphPreprocessor", "Update the graph for the build requested")
inputGraphFile = exe.InputFlag(app, "Input graph file having full build graph")
outputGraphFile = exe.OutputFlag(app, "Output file to export the scrubbed graph to")
hydratedBuild = app.Flag("hydrated-build", "Build individual packages with dependencies Hydrated").Bool()
logFile = exe.LogFileFlag(app)
logLevel = exe.LogLevelFlag(app)
)
func replaceRunNodesWithPrebuiltNodes(pkgGraph *pkggraph.PkgGraph) (err error) {
for _, node := range pkgGraph.AllNodes() {
if node.Type != pkggraph.TypeRun {
continue
}
isPrebuilt, _ := pkggraph.IsSRPMPrebuilt(node.SrpmPath, pkgGraph, nil)
if isPrebuilt == false {
continue
}
preBuiltNode := pkgGraph.CloneNode(node)
preBuiltNode.State = pkggraph.StateUpToDate
preBuiltNode.Type = pkggraph.TypePreBuilt
parentNodes := pkgGraph.To(node.ID())
for parentNodes.Next() {
parentNode := parentNodes.Node().(*pkggraph.PkgNode)
if parentNode.Type != pkggraph.TypeGoal {
pkgGraph.RemoveEdge(parentNode.ID(), node.ID())
logger.Log.Debugf("Adding a 'PreBuilt' node '%s' with id %d. For '%s'", preBuiltNode.FriendlyName(), preBuiltNode.ID(), parentNode.FriendlyName())
err = pkgGraph.AddEdge(parentNode, preBuiltNode)
if err != nil {
logger.Log.Errorf("Adding edge failed for %v -> %v", parentNode, preBuiltNode)
return
}
}
}
}
return
}
func main() {
app.Version(exe.ToolkitVersion)
kingpin.MustParse(app.Parse(os.Args[1:]))
logger.InitBestEffort(*logFile, *logLevel)
scrubbedGraph := pkggraph.NewPkgGraph()
err := pkggraph.ReadDOTGraphFile(scrubbedGraph, *inputGraphFile)
if err != nil {
logger.Log.Panicf("Failed to read graph to file, %s. Error: %s", *inputGraphFile, err)
}
if *hydratedBuild {
logger.Log.Debugf("Nodes before replacing prebuilt nodes: %d", len(scrubbedGraph.AllNodes()))
err = replaceRunNodesWithPrebuiltNodes(scrubbedGraph)
logger.Log.Debugf("Nodes after replacing prebuilt nodes: %d", len(scrubbedGraph.AllNodes()))
if err != nil {
logger.Log.Panicf("Failed to replace run nodes with preBuilt nodes. Error: %s", err)
}
}
err = pkggraph.WriteDOTGraphFile(scrubbedGraph, *outputGraphFile)
if err != nil {
logger.Log.Panicf("Failed to write cache graph to file, %s. Error: %s", *outputGraphFile, err)
}
return
}

View File

@ -1203,9 +1203,9 @@ func (g *PkgGraph) MakeDAG() (err error) {
}
}
// cloneNode creates a clone of the input node with a new, unique ID.
// CloneNode creates a clone of the input node with a new, unique ID.
// The clone doesn't have any edges attached to it.
func (g *PkgGraph) cloneNode(pkgNode *PkgNode) (newNode *PkgNode) {
func (g *PkgGraph) CloneNode(pkgNode *PkgNode) (newNode *PkgNode) {
newNode = &PkgNode{
nodeID: g.NewNode().ID(),
VersionedPkg: pkgNode.VersionedPkg,
@ -1299,7 +1299,7 @@ func (g *PkgGraph) fixPrebuiltSRPMsCycle(trimmedCycle []*PkgNode) (err error) {
logger.Log.Debugf("Cycle contains pre-built SRPM '%s'. Replacing edges from build nodes associated with '%s' with an edge to a new 'PreBuilt' node.",
currentNode.SrpmPath, previousNode.SrpmPath)
preBuiltNode := g.cloneNode(currentNode)
preBuiltNode := g.CloneNode(currentNode)
preBuiltNode.State = StateUpToDate
preBuiltNode.Type = TypePreBuilt