镜像导入是由image/tarexport/load.go#tarexporter.Load()完成的
以下代码参考github.com/docker/docker版本v0.0.0-20181129155816-baab736a3649
主要是注册镜像信息以及解包镜像tar流到新root
导出和保存的区别在于
- 导出(export): 仅导出文件结构
- 保存(save): 保存镜像历史和元数据
这意味着导出将不会包含USER、EXPOSE等Dockerfile里面的命令,也就无法转移镜像到另一台机器上了
go
func (l *tarexporter) Load(inTar io.ReadCloser, outStream io.Writer, quiet bool) error {
var progressOutput progress.Output
if !quiet {
progressOutput = streamformatter.NewJSONProgressOutput(outStream, false)
}
outStream = streamformatter.NewStdoutWriter(outStream)
// 1. 创建docker-import的临时目录
tmpDir, err := ioutil.TempDir("", "docker-import-")
if err != nil {
return err
}
defer os.RemoveAll(tmpDir)
// 2. 解包tar流到临时目录
if err := chrootarchive.Untar(inTar, tmpDir, nil); err != nil {
return err
}
// 3. 打开manifest文件,并解析
manifestPath, err := safePath(tmpDir, manifestFileName)
if err != nil {
return err
}
manifestFile, err := os.Open(manifestPath)
if err != nil {
if os.IsNotExist(err) {
return l.legacyLoad(tmpDir, outStream, progressOutput)
}
return err
}
defer manifestFile.Close()
var manifest []manifestItem
if err := json.NewDecoder(manifestFile).Decode(&manifest); err != nil {
return err
}
var parentLinks []parentLink
var imageIDsStr string
var imageRefCount int
// 4. 从manifest中读取并解析到image
for _, m := range manifest {
configPath, err := safePath(tmpDir, m.Config)
if err != nil {
return err
}
config, err := ioutil.ReadFile(configPath)
if err != nil {
return err
}
img, err := image.NewFromJSON(config)
if err != nil {
return err
}
if err := checkCompatibleOS(img.OS); err != nil {
return err
}
rootFS := *img.RootFS
rootFS.DiffIDs = nil
// 若image rootFS diffID数量与manifest中记录的层数不一致,则报错
if expected, actual := len(m.Layers), len(img.RootFS.DiffIDs); expected != actual {
return fmt.Errorf("invalid manifest, layers length mismatch: expected %d, got %d", expected, actual)
}
// On Windows, validate the platform, defaulting to windows if not present.
os := img.OS
if os == "" {
os = runtime.GOOS
}
if runtime.GOOS == "windows" {
if (os != "windows") && (os != "linux") {
return fmt.Errorf("configuration for this image has an unsupported operating system: %s", os)
}
}
// 5. 注册层
for i, diffID := range img.RootFS.DiffIDs {
layerPath, err := safePath(tmpDir, m.Layers[i])
if err != nil {
return err
}
r := rootFS
r.Append(diffID)
newLayer, err := l.lss[os].Get(r.ChainID())
if err != nil {
// 如果没有注册,那就注册layer
newLayer, err = l.loadLayer(layerPath, rootFS, diffID.String(), os, m.LayerSources[diffID], progressOutput)
if err != nil {
return err
}
}
defer layer.ReleaseAndLog(l.lss[os], newLayer)
// 若manifest与缓存中layer diffID不一致,则报错
if expected, actual := diffID, newLayer.DiffID(); expected != actual {
return fmt.Errorf("invalid diffID for layer %d: expected %q, got %q", i, expected, actual)
}
rootFS.Append(diffID)
}
// 6. 缓存该层镜像配置
imgID, err := l.is.Create(config)
if err != nil {
return err
}
imageIDsStr += fmt.Sprintf("Loaded image ID: %s\n", imgID)
imageRefCount = 0
for _, repoTag := range m.RepoTags {
named, err := reference.ParseNormalizedNamed(repoTag)
if err != nil {
return err
}
ref, ok := named.(reference.NamedTagged)
if !ok {
return fmt.Errorf("invalid tag %q", repoTag)
}
// 设置已加载的id、reference
l.setLoadedTag(ref, imgID.Digest(), outStream)
outStream.Write([]byte(fmt.Sprintf("Loaded image: %s\n", reference.FamiliarString(ref))))
imageRefCount++
}
parentLinks = append(parentLinks, parentLink{imgID, m.Parent})
l.loggerImgEvent.LogImageEvent(imgID.String(), imgID.String(), "load")
}
for _, p := range validatedParentLinks(parentLinks) {
if p.parentID != "" {
if err := l.setParentID(p.id, p.parentID); err != nil {
return err
}
}
}
if imageRefCount == 0 {
outStream.Write([]byte(imageIDsStr))
}
return nil
}
Untar
主要过程是将tar流解包到新root
untar操作实际由chrootarchive/archive_unix.go untar()执行
go
// untar is the entry-point for docker-untar on re-exec. This is not used on
// Windows as it does not support chroot, hence no point sandboxing through
// chroot and rexec.
func untar() {
runtime.LockOSThread()
flag.Parse()
var options *archive.TarOptions
//read the options from the pipe "ExtraFiles"
if err := json.NewDecoder(os.NewFile(3, "options")).Decode(&options); err != nil {
fatal(err)
}
// Linux上的Chroot使用pivot_root,而不是Chroot。 pivot_root需要一个新根和一个旧根。旧根必须是新根的子目录,它是调用pivot_root后当前rootfs驻留的位置。New root是新rootfs设置的位置。在调用pivot_root之后,旧根会被移除,因此在新根下不再可用。这类似于libcontainer设置容器rootfs的方式
// 在这里是以前面创建的临时目录作为新root,并在其下创建privot_root作为老root,最后切换到新root
if err := chroot(flag.Arg(0)); err != nil {
fatal(err)
}
// 将tar流解包到新root
if err := archive.Unpack(os.Stdin, "/", options); err != nil {
fatal(err)
}
// fully consume stdin in case it is zero padded
if _, err := flush(os.Stdin); err != nil {
fatal(err)
}
os.Exit(0)
}
loadLayer
注册镜像层以及加载层tar流到对应目录下
image/tarexport/load.go#tarexpoter.loadLayer()
go
func (l *tarexporter) loadLayer(filename string, rootFS image.RootFS, id string, os string, foreignSrc distribution.Descriptor, progressOutput progress.Output) (layer.Layer, error) {
// We use system.OpenSequential to use sequential file access on Windows, avoiding
// depleting the standby list. On Linux, this equates to a regular os.Open.
rawTar, err := system.OpenSequential(filename)
if err != nil {
logrus.Debugf("Error reading embedded tar: %v", err)
return nil, err
}
defer rawTar.Close()
var r io.Reader
if progressOutput != nil {
fileInfo, err := rawTar.Stat()
if err != nil {
logrus.Debugf("Error statting file: %v", err)
return nil, err
}
r = progress.NewProgressReader(rawTar, progressOutput, fileInfo.Size(), stringid.TruncateID(id), "Loading layer")
} else {
r = rawTar
}
inflatedLayerData, err := archive.DecompressStream(r)
if err != nil {
return nil, err
}
defer inflatedLayerData.Close()
if ds, ok := l.lss[os].(layer.DescribableStore); ok {
return ds.RegisterWithDescriptor(inflatedLayerData, rootFS.ChainID(), foreignSrc)
}
// 到这里是去注册层tar流和本层镜像的chainID
return l.lss[os].Register(inflatedLayerData, rootFS.ChainID())
}
go
func (ls *layerStore) registerWithDescriptor(ts io.Reader, parent ChainID, descriptor distribution.Descriptor) (Layer, error) {
// err is used to hold the error which will always trigger
// cleanup of creates sources but may not be an error returned
// to the caller (already exists).
var err error
var pid string
var p *roLayer
// 1. 从缓存中获取到给定chainID的层信息
if string(parent) != "" {
p = ls.get(parent)
if p == nil {
return nil, ErrLayerDoesNotExist
}
pid = p.cacheID
// Release parent chain if error
defer func() {
if err != nil {
ls.layerL.Lock()
ls.releaseLayer(p)
ls.layerL.Unlock()
}
}()
if p.depth() >= maxLayerDepth {
err = ErrMaxDepthExceeded
return nil, err
}
}
// 2. 创建新的只读层
layer := &roLayer{
parent: p,
cacheID: stringid.GenerateRandomID(),
referenceCount: 1,
layerStore: ls,
references: map[Layer]struct{}{},
descriptor: descriptor,
}
// 3. 准备文件系统(overlay2)文件目录结构
if err = ls.driver.Create(layer.cacheID, pid, nil); err != nil {
return nil, err
}
tx, err := ls.store.StartTransaction()
if err != nil {
return nil, err
}
defer func() {
if err != nil {
logrus.Debugf("Cleaning up layer %s: %v", layer.cacheID, err)
if err := ls.driver.Remove(layer.cacheID); err != nil {
logrus.Errorf("Error cleaning up cache layer %s: %v", layer.cacheID, err)
}
if err := tx.Cancel(); err != nil {
logrus.Errorf("Error canceling metadata transaction %q: %s", tx.String(), err)
}
}
}()
// 4. 从给定读写层流中提取变化的内容到镜像层挂载点
if err = ls.applyTar(tx, ts, pid, layer); err != nil {
return nil, err
}
// 5. 若本层无父层,那么chainID就是自己的diffID。否则从parent和自己的diffID中生成
if layer.parent == nil {
layer.chainID = ChainID(layer.diffID)
} else {
layer.chainID = createChainIDFromParent(layer.parent.chainID, layer.diffID)
}
// 6. 储存层diffID、size、cacheID、descriptor、parent、os等信息
if err = storeLayer(tx, layer); err != nil {
return nil, err
}
ls.layerL.Lock()
defer ls.layerL.Unlock()
if existingLayer := ls.getWithoutLock(layer.chainID); existingLayer != nil {
// Set error for cleanup, but do not return the error
err = errors.New("layer already exists")
return existingLayer.getReference(), nil
}
if err = tx.Commit(layer.chainID); err != nil {
return nil, err
}
ls.layerMap[layer.chainID] = layer
return layer.getReference(), nil
}
driver.Create
为镜像层创建diff、work、lower目录,并写入镜像层tar流lower内容到对应lower目录
go
func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) {
dir := d.dir(id)
// 1. 获取当前用户在宿主机对应的userID、groupID
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
if err != nil {
return err
}
root := idtools.Identity{UID: rootUID, GID: rootGID}
// 2. 为当前用户创建镜像目录
if err := idtools.MkdirAllAndChown(path.Dir(dir), 0700, root); err != nil {
return err
}
if err := idtools.MkdirAndChown(dir, 0700, root); err != nil {
return err
}
defer func() {
// Clean up on failure
if retErr != nil {
os.RemoveAll(dir)
}
}()
// 3. 解析储存选项
if opts != nil && len(opts.StorageOpt) > 0 {
driver := &Driver{}
if err := d.parseStorageOpt(opts.StorageOpt, driver); err != nil {
return err
}
// 4. 设置储存配额
if driver.options.quota.Size > 0 {
// Set container disk quota limit
if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
return err
}
}
}
// 5. 创建镜像diff目录
if err := idtools.MkdirAndChown(path.Join(dir, "diff"), 0755, root); err != nil {
return err
}
// 6. 创建指向diff目录的链接
lid := generateID(idLength)
if err := os.Symlink(path.Join("..", id, "diff"), path.Join(d.home, linkDir, lid)); err != nil {
return err
}
// 7. 将链接id写入链接文件
if err := ioutil.WriteFile(path.Join(dir, "link"), []byte(lid), 0644); err != nil {
return err
}
// 8. 父层不存在就直接返回
if parent == "" {
return nil
}
// 9. 创建镜像work目录作为overlay2内部使用
if err := idtools.MkdirAndChown(path.Join(dir, "work"), 0700, root); err != nil {
return err
}
// 10. 找到父层(也就是tar中的镜像层)lower文件,并写入到当前层lower文件中
lower, err := d.getLower(parent)
if err != nil {
return err
}
if lower != "" {
if err := ioutil.WriteFile(path.Join(dir, lowerFile), []byte(lower), 0666); err != nil {
return err
}
}
return nil
}
applyTar
将层tar流解包到层挂载点
go
func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff io.Reader) (size int64, err error) {
driver := gdw.ProtoDriver
// Mount the root filesystem so we can apply the diff/layer.
// 返回由id引用的分层文件系统的挂载点
layerRootFs, err := driver.Get(id, "")
if err != nil {
return
}
defer driver.Put(id)
layerFs := layerRootFs.Path()
options := &archive.TarOptions{UIDMaps: gdw.uidMaps,
GIDMaps: gdw.gidMaps}
start := time.Now().UTC()
logrus.WithField("id", id).Debug("Start untar layer")
// 将层tar流解包到层挂载点
if size, err = ApplyUncompressedLayer(layerFs, diff, options); err != nil {
return
}
logrus.WithField("id", id).Debugf("Untar time: %vs", time.Now().UTC().Sub(start).Seconds())
return
}
Create
创建就是在缓存中添加镜像信息,保存配置
go
func (is *store) Create(config []byte) (ID, error) {
var img Image
err := json.Unmarshal(config, &img)
if err != nil {
return "", err
}
// Must reject any config that references diffIDs from the history
// which aren't among the rootfs layers.
rootFSLayers := make(map[layer.DiffID]struct{})
for _, diffID := range img.RootFS.DiffIDs {
rootFSLayers[diffID] = struct{}{}
}
// 如果记录的创建历史非空层大于rootFS层数,报错
layerCounter := 0
for _, h := range img.History {
if !h.EmptyLayer {
layerCounter++
}
}
if layerCounter > len(img.RootFS.DiffIDs) {
return "", errors.New("too many non-empty layers in History section")
}
// 将解析配置写入content目录
dgst, err := is.fs.Set(config)
if err != nil {
return "", err
}
imageID := IDFromDigest(dgst)
is.Lock()
defer is.Unlock()
// 若镜像已经存在镜像元数据缓存中,就直接返回
if _, exists := is.images[imageID]; exists {
return imageID, nil
}
layerID := img.RootFS.ChainID()
var l layer.Layer
// 获取镜像只读层,并缓存
if layerID != "" {
if !system.IsOSSupported(img.OperatingSystem()) {
return "", system.ErrNotSupportedOperatingSystem
}
l, err = is.lss[img.OperatingSystem()].Get(layerID)
if err != nil {
return "", errors.Wrapf(err, "failed to get layer %s", layerID)
}
}
imageMeta := &imageMeta{
layer: l,
children: make(map[ID]struct{}),
}
is.images[imageID] = imageMeta
// 添加reference和id缓存
if err := is.digestSet.Add(imageID.Digest()); err != nil {
delete(is.images, imageID)
return "", err
}
return imageID, nil
}
创建容器时是如何使用image的?
- 从缓存获取镜像配置进行校验以及合并容器配置
- 以镜像chainID作为容器挂载层(也是读写层)的parent
- 复制镜像目录内容到容器目录