Skip to content

Commit 04ee66c

Browse files
committed
build,cache: support pulling/pushing cache layers to/from remote sources
Following commit * Initiates `cacheKey` or `layerKey` for intermediate images generated for layers. * Allows end users to upload cached layers with `cacheKey` to remote sources using `--cache-to`. `--cache-to` is a optional flag to be used with `buildah build` which publishes cached layers to remote sources. * Allows end users to use cached layers from `remote` sources with `--cache-from`. `--cache-from` is a optional flag to be used with `buildah build` and it pulls cached layers from remote sources in a step by step manner only if is a valid cache hit. Example * Populate cache source or use cached layers if already present ```bash buildah build -t test --layers --cache-to registry/myrepo/cache --cache-from registry/myrepo/cache . ``` Future: * `cacheKey` or `layerKey` model is only being used when working with remote sources however local cache lookup can be also optimized if its is altered to use `cacheKey` model instead of iterating through all the images in local storage. As discussed here References: * Feature is quite similar to `kaniko`'s `--cache-repo`: https://github.com/GoogleContainerTools/kaniko#--cache-repo Closes: issues#620 Signed-off-by: Aditya R <[email protected]>
1 parent 1191961 commit 04ee66c

File tree

7 files changed

+345
-6
lines changed

7 files changed

+345
-6
lines changed

define/build.go

+6
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ type BuildOptions struct {
136136
RuntimeArgs []string
137137
// TransientMounts is a list of mounts that won't be kept in the image.
138138
TransientMounts []string
139+
// CacheFrom specifies any remote repository which can be treated as
140+
// potential cache source.
141+
CacheFrom string
142+
// CacheTo specifies any remote repository which can be treated as
143+
// potential cache destination.
144+
CacheTo string
139145
// Compression specifies the type of compression which is applied to
140146
// layer blobs. The default is to not use compression, but
141147
// archive.Gzip is recommended.

docs/buildah-build.1.md

+31-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,37 @@ The value of `[name]` is matched with the following priority order:
9696

9797
**--cache-from**
9898

99-
Images to utilise as potential cache sources. Buildah does not currently support --cache-from so this is a NOOP.
99+
Repository to utilise as potential cache source. When specified buildah will try to look for
100+
cache images in specified repository and will attempt to pull cache images instead of actually
101+
executing and computing STEPS. Buildah will only attempt to pull cache images if they are considered
102+
as valid cache hits otherwise not.
103+
104+
Use `--cache-to` to populate a remote repository with cache content
105+
106+
Example
107+
108+
```bash
109+
# populate a cache and also consult it
110+
buildah build -t test --layers --cache-to registry/myrepo/cache --cache-from registry/myrepo/cache .
111+
```
112+
113+
Note: option is ignored unless `--layers` is specified.
114+
115+
**--cache-to**
116+
117+
Set this flag to specify a remote repository that will be used to store cache images. Buildah will attempt to
118+
push newly built cache image to the remote repository.
119+
120+
Note: Use `--cache-from` in order to use cache content in a remote repository.
121+
122+
Example
123+
124+
```bash
125+
# populate a cache and also consult it
126+
buildah build -t test --layers --cache-to registry/myrepo/cache --cache-from registry/myrepo/cache .
127+
```
128+
129+
Note: option is ignored unless `--layers` is specified.
100130

101131
**--cap-add**=*CAP\_xxx*
102132

imagebuildah/executor.go

+4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ var builtinAllowedBuildArgs = map[string]bool{
5858
// interface. It coordinates the entire build by using one or more
5959
// StageExecutors to handle each stage of the build.
6060
type Executor struct {
61+
cacheFrom string
62+
cacheTo string
6163
containerSuffix string
6264
logger *logrus.Logger
6365
stages map[string]*StageExecutor
@@ -212,6 +214,8 @@ func newExecutor(logger *logrus.Logger, logPrefix string, store storage.Store, o
212214
}
213215

214216
exec := Executor{
217+
cacheFrom: options.CacheFrom,
218+
cacheTo: options.CacheTo,
215219
containerSuffix: options.ContainerSuffix,
216220
logger: logger,
217221
stages: make(map[string]*StageExecutor),

imagebuildah/stage_executor.go

+205
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package imagebuildah
22

33
import (
44
"context"
5+
"crypto/sha256"
56
"fmt"
67
"io"
78
"os"
@@ -26,6 +27,7 @@ import (
2627
"github.com/containers/image/v5/manifest"
2728
is "github.com/containers/image/v5/storage"
2829
"github.com/containers/image/v5/transports"
30+
"github.com/containers/image/v5/transports/alltransports"
2931
"github.com/containers/image/v5/types"
3032
"github.com/containers/storage"
3133
"github.com/containers/storage/pkg/chrootarchive"
@@ -945,6 +947,18 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
945947
s.log(commitMessage)
946948
}
947949
}
950+
logCachePulled := func(src string) {
951+
if !s.executor.quiet {
952+
cacheHitMessage := "--> Cache pulled from remote"
953+
fmt.Fprintf(s.executor.out, "%s %s\n", cacheHitMessage, src)
954+
}
955+
}
956+
logCachePush := func(src string) {
957+
if !s.executor.quiet {
958+
cacheHitMessage := "--> Pushing cache"
959+
fmt.Fprintf(s.executor.out, "%s %s\n", cacheHitMessage, src)
960+
}
961+
}
948962
logCacheHit := func(cacheID string) {
949963
if !s.executor.quiet {
950964
cacheHitMessage := "--> Using cache"
@@ -1145,6 +1159,8 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
11451159
var (
11461160
commitName string
11471161
cacheID string
1162+
cacheKey string
1163+
cachePulled bool
11481164
err error
11491165
rebase bool
11501166
addedContentSummary string
@@ -1156,6 +1172,15 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
11561172
commitName = s.output
11571173
}
11581174

1175+
// If --cache-from or --cache-to is specified make sure to populate
1176+
// cacheKey since it will be used either while pulling or pushing the
1177+
// cache images.
1178+
if s.executor.cacheFrom != "" || s.executor.cacheTo != "" {
1179+
cacheKey, err = s.generateCacheKey(ctx, node, addedContentSummary, s.stepRequiresLayer(step))
1180+
if err != nil {
1181+
return "", nil, fmt.Errorf("failed while generating cache key: %w", err)
1182+
}
1183+
}
11591184
// Check if there's already an image based on our parent that
11601185
// has the same change that we're about to make, so far as we
11611186
// can tell.
@@ -1168,6 +1193,24 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
11681193
if err != nil {
11691194
return "", nil, fmt.Errorf("error checking if cached image exists from a previous build: %w", err)
11701195
}
1196+
// All the best effort to find cache on localstorage have failed try pulling
1197+
// cache from remote repo if `--cache-from` was configured.
1198+
if cacheID == "" && s.executor.cacheFrom != "" {
1199+
src := fmt.Sprintf("%s:%s", s.executor.cacheFrom, cacheKey)
1200+
// only attempt to use cache again if pulling was successful
1201+
// otherwise do nothing and attempt to run the step, err != nil
1202+
// is ignored and will be automatically logged for --log-level debug
1203+
if id, err := s.pullCache(ctx, src); id != "" && err == nil {
1204+
logCachePulled(src)
1205+
cacheID, err = s.intermediateImageExists(ctx, node, addedContentSummary, s.stepRequiresLayer(step))
1206+
if err != nil {
1207+
return "", nil, fmt.Errorf("error checking if cached image exists from a previous build: %w", err)
1208+
}
1209+
if cacheID != "" {
1210+
cachePulled = true
1211+
}
1212+
}
1213+
}
11711214
}
11721215

11731216
// If we didn't find a cache entry, or we need to add content
@@ -1216,6 +1259,18 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
12161259
}
12171260
}
12181261

1262+
// Small wrapper around s.pushCache to prevent duplication of code
1263+
pushCache := func() error {
1264+
if s.executor.cacheTo != "" {
1265+
destSpec := fmt.Sprintf("%s:%s", s.executor.cacheTo, cacheKey)
1266+
logCachePush(destSpec)
1267+
if err = s.pushCache(ctx, imgID, destSpec); err != nil {
1268+
return err
1269+
}
1270+
}
1271+
return nil
1272+
}
1273+
12191274
// Note: If the build has squash, we must try to re-use as many layers as possible if cache is found.
12201275
// So only perform commit if its the lastInstruction of lastStage.
12211276
if cacheID != "" {
@@ -1231,6 +1286,14 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
12311286
return "", nil, err
12321287
}
12331288
}
1289+
if !cachePulled {
1290+
// Try to push this cache to remote repository only
1291+
// if cache was present on local storage and not
1292+
// pulled from remote source while processing this step.
1293+
if err = pushCache(); err != nil {
1294+
return "", nil, err
1295+
}
1296+
}
12341297
} else {
12351298
// We're not going to find any more cache hits, so we
12361299
// can stop looking for them.
@@ -1246,6 +1309,12 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
12461309
if err != nil {
12471310
return "", nil, fmt.Errorf("error committing container for step %+v: %w", *step, err)
12481311
}
1312+
// Following step is just built and was not used from cache so
1313+
// check if --cache-to was specified if yes then attempt pushing
1314+
// this cache to remote repo and fail accordingly
1315+
if err = pushCache(); err != nil {
1316+
return "", nil, err
1317+
}
12491318
}
12501319

12511320
// Create a squashed version of this image
@@ -1521,6 +1590,142 @@ func (s *StageExecutor) tagExistingImage(ctx context.Context, cacheID, output st
15211590
return img.ID, ref, nil
15221591
}
15231592

1593+
// generateCacheKey returns a computed digest for the current STEP
1594+
// running its history and diff against a hash algorithm and this
1595+
// generated CacheKey is further used by buildah to lock and decide
1596+
// tag for the intermeidate image which can be pushed and pulled to/from
1597+
// the remote repository.
1598+
func (s *StageExecutor) generateCacheKey(ctx context.Context, currNode *parser.Node, addedContentDigest string, buildAddsLayer bool) (string, error) {
1599+
hash := sha256.New()
1600+
var baseHistory []v1.History
1601+
var diffIDs []digest.Digest
1602+
var manifestType string
1603+
var err error
1604+
if s.builder.FromImageID != "" {
1605+
manifestType, baseHistory, diffIDs, err = s.executor.getImageTypeAndHistoryAndDiffIDs(ctx, s.builder.FromImageID)
1606+
if err != nil {
1607+
return "", fmt.Errorf("error getting history of base image %q: %w", s.builder.FromImageID, err)
1608+
}
1609+
for i := 0; i < len(diffIDs); i++ {
1610+
fmt.Fprintln(hash, diffIDs[i].String())
1611+
}
1612+
}
1613+
createdBy := s.getCreatedBy(currNode, addedContentDigest)
1614+
fmt.Fprintf(hash, "%t", buildAddsLayer)
1615+
fmt.Fprintln(hash, createdBy)
1616+
fmt.Fprintln(hash, manifestType)
1617+
for _, element := range baseHistory {
1618+
fmt.Fprintln(hash, element.CreatedBy)
1619+
fmt.Fprintln(hash, element.Author)
1620+
fmt.Fprintln(hash, element.Comment)
1621+
fmt.Fprintln(hash, element.Created)
1622+
fmt.Fprintf(hash, "%t", element.EmptyLayer)
1623+
fmt.Fprintln(hash)
1624+
}
1625+
return fmt.Sprintf("%x", hash.Sum(nil)), nil
1626+
}
1627+
1628+
// pushCache takes the image id of intermediate image and attempts
1629+
// to perform push at the remote repository with cacheKey as the tag.
1630+
// Returns error if fails otherwise returns nil.
1631+
func (s *StageExecutor) pushCache(ctx context.Context, src, destSpec string) error {
1632+
logrus.Debugf("trying to push cache to dest: %+v from src:%+v", destSpec, src)
1633+
dest, err := alltransports.ParseImageName(destSpec)
1634+
// Add the docker:// transport to see if user neglected it while
1635+
// specifying --cache-to `<dest>`.
1636+
if err != nil {
1637+
destTransport := strings.Split(destSpec, ":")[0]
1638+
if t := transports.Get(destTransport); t != nil {
1639+
return err
1640+
}
1641+
1642+
if strings.Contains(destSpec, "://") {
1643+
return err
1644+
}
1645+
1646+
destSpec = "docker://" + destSpec
1647+
dest2, err2 := alltransports.ParseImageName(destSpec)
1648+
if err2 != nil {
1649+
return err
1650+
}
1651+
dest = dest2
1652+
logrus.Debugf("Assuming docker:// as the transport method for cache DESTINATION: %s", destSpec)
1653+
}
1654+
options := buildah.PushOptions{
1655+
Compression: s.executor.compression,
1656+
SignaturePolicyPath: s.executor.signaturePolicyPath,
1657+
Store: s.executor.store,
1658+
SystemContext: s.executor.systemContext,
1659+
BlobDirectory: s.executor.blobDirectory,
1660+
SignBy: s.executor.signBy,
1661+
MaxRetries: s.executor.maxPullPushRetries,
1662+
RetryDelay: s.executor.retryPullPushDelay,
1663+
}
1664+
ref, digest, err := buildah.Push(ctx, src, dest, options)
1665+
if err != nil {
1666+
return fmt.Errorf("failed pushing cache to %q: %w", dest, err)
1667+
}
1668+
logrus.Debugf("successfully pushed cache to dest: %+v with ref:%+v and digest: %v", dest, ref, digest)
1669+
return nil
1670+
}
1671+
1672+
// pullCache takes the image source of the cache assuming tag
1673+
// already points to the valid cacheKey and pulls the image to
1674+
// local storage only if it was not already present on local storage
1675+
// or a newer version of cache was found in the upstream repo. If new
1676+
// image was pulled function returns image id otherwise returns empty
1677+
// string "" or error if any error was encontered while pulling the cache.
1678+
func (s *StageExecutor) pullCache(ctx context.Context, src string) (string, error) {
1679+
logrus.Debugf("trying to pull cache from remote repo: %+v", src)
1680+
options := buildah.PullOptions{
1681+
SignaturePolicyPath: s.executor.signaturePolicyPath,
1682+
Store: s.executor.store,
1683+
SystemContext: s.executor.systemContext,
1684+
BlobDirectory: s.executor.blobDirectory,
1685+
MaxRetries: s.executor.maxPullPushRetries,
1686+
RetryDelay: s.executor.retryPullPushDelay,
1687+
AllTags: false,
1688+
ReportWriter: nil,
1689+
PullPolicy: define.PullIfNewer,
1690+
}
1691+
// Get list of images in local storage before we pull image.
1692+
// We need this to ensure if we actually pulled any image or not
1693+
// and reason for this is explained below in the same function.
1694+
//
1695+
// NOTE, TODO: This logic can be removed once Pull API supports notifying
1696+
// back if image was actually pulled or not.
1697+
images, err := s.executor.store.Images()
1698+
if err != nil {
1699+
return "", fmt.Errorf("failed while getting images from local storage: %w", err)
1700+
}
1701+
id, err := buildah.Pull(ctx, src, options)
1702+
if err != nil {
1703+
logrus.Debugf("failed pulling cache from source %s: %v", src, err)
1704+
return "", fmt.Errorf("failed while pulling cache from %q: %w", src, err)
1705+
}
1706+
if id != "" {
1707+
// Reason for this logic:
1708+
//
1709+
// pullCache eventually returns pulled image ID back to the caller
1710+
// and here buildah will produce a log with image ID notifying to
1711+
// end-users if anything was actually pulled from remote repo in the
1712+
// the build output by calling `logCachePulled()`.
1713+
//
1714+
// Since following build output is part of stdout lets not return wrong
1715+
// info and further we are using build output to verify test-cases so
1716+
// don't return any id, since image was already present on local and buildah
1717+
// will eventually figure out to use valid cache source from local storage.
1718+
for _, image := range images {
1719+
if id == image.ID {
1720+
logrus.Debugf("cache was not pulled from:%s since id:%s is already present on local storage", src, id)
1721+
return "", nil
1722+
}
1723+
}
1724+
}
1725+
logrus.Debugf("successfully pulled cache from repo %s: %s", src, id)
1726+
return id, nil
1727+
}
1728+
15241729
// intermediateImageExists returns true if an intermediate image of currNode exists in the image store from a previous build.
15251730
// It verifies this by checking the parent of the top layer of the image and the history.
15261731
func (s *StageExecutor) intermediateImageExists(ctx context.Context, currNode *parser.Node, addedContentDigest string, buildAddsLayer bool) (string, error) {

pkg/cli/build.go

+2-4
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,6 @@ func GenBuildOptions(c *cobra.Command, inputArgs []string, iopts BuildOptions) (
233233
return options, nil, nil, errors.New("'rm' and 'force-rm' can only be set with either 'layers' or 'no-cache'")
234234
}
235235

236-
if c.Flag("cache-from").Changed {
237-
logrus.Debugf("build --cache-from not enabled, has no effect")
238-
}
239-
240236
if c.Flag("compress").Changed {
241237
logrus.Debugf("--compress option specified but is ignored")
242238
}
@@ -300,6 +296,8 @@ func GenBuildOptions(c *cobra.Command, inputArgs []string, iopts BuildOptions) (
300296
Args: args,
301297
BlobDirectory: iopts.BlobCache,
302298
BuildOutput: iopts.BuildOutput,
299+
CacheFrom: iopts.CacheFrom,
300+
CacheTo: iopts.CacheTo,
303301
CNIConfigDir: iopts.CNIConfigDir,
304302
CNIPluginPath: iopts.CNIPlugInPath,
305303
CPPFlags: iopts.CPPFlags,

pkg/cli/common.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ type BudResults struct {
5454
BuildArg []string
5555
BuildContext []string
5656
CacheFrom string
57+
CacheTo string
5758
CertDir string
5859
Compress bool
5960
Creds string
@@ -197,7 +198,8 @@ func GetBudFlags(flags *BudResults) pflag.FlagSet {
197198
fs.StringArrayVar(&flags.OCIHooksDir, "hooks-dir", []string{}, "set the OCI hooks directory path (may be set multiple times)")
198199
fs.StringArrayVar(&flags.BuildArg, "build-arg", []string{}, "`argument=value` to supply to the builder")
199200
fs.StringArrayVar(&flags.BuildContext, "build-context", []string{}, "`argument=value` to supply additional build context to the builder")
200-
fs.StringVar(&flags.CacheFrom, "cache-from", "", "images to utilise as potential cache sources. The build process does not currently support caching so this is a NOOP.")
201+
fs.StringVar(&flags.CacheFrom, "cache-from", "", "remote repository to utilise as potential cache source.")
202+
fs.StringVar(&flags.CacheTo, "cache-to", "", "remote repository to utilise as potential cache destination.")
201203
fs.StringVar(&flags.CertDir, "cert-dir", "", "use certificates at the specified path to access the registry")
202204
fs.BoolVar(&flags.Compress, "compress", false, "this is a legacy option, which has no effect on the image")
203205
fs.StringArrayVar(&flags.CPPFlags, "cpp-flag", []string{}, "set additional flag to pass to C preprocessor (cpp)")
@@ -276,6 +278,7 @@ func GetBudFlagsCompletions() commonComp.FlagCompletions {
276278
flagCompletion["build-arg"] = commonComp.AutocompleteNone
277279
flagCompletion["build-context"] = commonComp.AutocompleteNone
278280
flagCompletion["cache-from"] = commonComp.AutocompleteNone
281+
flagCompletion["cache-to"] = commonComp.AutocompleteNone
279282
flagCompletion["cert-dir"] = commonComp.AutocompleteDefault
280283
flagCompletion["cpp-flag"] = commonComp.AutocompleteNone
281284
flagCompletion["creds"] = commonComp.AutocompleteNone

0 commit comments

Comments
 (0)