Skip to content
This repository was archived by the owner on Mar 27, 2024. It is now read-only.

Commit f6af597

Browse files
authored
Merge pull request #291 from nkubala/pip_fix
Use top_level.txt when analyzing pip modules
2 parents 6d827eb + ad1af11 commit f6af597

File tree

3 files changed

+37
-26
lines changed

3 files changed

+37
-26
lines changed

differs/pip_diff.go

+35-9
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ limitations under the License.
1717
package differs
1818

1919
import (
20+
"bufio"
2021
"io/ioutil"
22+
"os"
2123
"path/filepath"
2224
"regexp"
2325
"strings"
@@ -92,18 +94,42 @@ func (a PipAnalyzer) getPackages(image pkgutil.Image) (map[string]map[string]uti
9294
packageName := packageMatch[1]
9395
version := packageMatch[2][:len(packageMatch[2])-1]
9496

95-
// Retrieves size for actual package/script corresponding to each dist-info metadata directory
96-
// by taking the file entry alphabetically before it (for a package) or after it (for a script)
97+
// First, try and use the "top_level.txt",
98+
// Many egg packages contains a "top_level.txt" file describing the directories containing the
99+
// required code. Combining the sizes of each of these directories should give the total size.
97100
var size int64
98-
if i-1 >= 0 && contents[i-1].Name() == packageName {
99-
packagePath := filepath.Join(pythonPath, packageName)
100-
size = pkgutil.GetSize(packagePath)
101-
} else if i+1 < len(contents) && contents[i+1].Name() == packageName+".py" {
102-
size = contents[i+1].Size()
101+
topLevelReader, err := os.Open(filepath.Join(pythonPath, fileName, "top_level.txt"))
102+
if err == nil {
103+
scanner := bufio.NewScanner(topLevelReader)
104+
scanner.Split(bufio.ScanLines)
105+
for scanner.Scan() {
106+
// check if directory exists first, then retrieve size
107+
contentPath := filepath.Join(pythonPath, scanner.Text())
108+
if _, err := os.Stat(contentPath); err == nil {
109+
size = size + pkgutil.GetSize(contentPath)
110+
} else if _, err := os.Stat(contentPath + ".py"); err == nil {
111+
// sometimes the top level content is just a single python file; try this too
112+
size = size + pkgutil.GetSize(contentPath+".py")
113+
}
114+
}
103115
} else {
104-
logrus.Errorf("Could not find Python package %s for corresponding metadata info", packageName)
105-
continue
116+
// if we didn't find a top_level.txt, we'll try the previous alphabetical directory entry heuristic
117+
logrus.Infof("unable to use top_level.txt: falling back to previous alphabetical directory entry heuristic...")
118+
119+
// Retrieves size for actual package/script corresponding to each dist-info metadata directory
120+
// by taking the file entry alphabetically before it (for a package) or after it (for a script)
121+
// var size int64
122+
if i-1 >= 0 && contents[i-1].Name() == packageName {
123+
packagePath := filepath.Join(pythonPath, packageName)
124+
size = pkgutil.GetSize(packagePath)
125+
} else if i+1 < len(contents) && contents[i+1].Name() == packageName+".py" {
126+
size = contents[i+1].Size()
127+
} else {
128+
logrus.Errorf("Could not find Python package %s for corresponding metadata info", packageName)
129+
continue
130+
}
106131
}
132+
107133
currPackage := util.PackageInfo{Version: version, Size: size}
108134
mapPath := strings.Replace(pythonPath, path, "", 1)
109135
addToMap(packages, packageName, mapPath, currPackage)

tests/integration_test.go

-15
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,6 @@ func TestDiffAndAnalysis(t *testing.T) {
141141
differFlags: []string{"--type=apt", "--no-cache"},
142142
expectedFile: "apt_diff_expected.json",
143143
},
144-
// {
145-
// description: "rpm differ",
146-
// subcommand: "diff",
147-
// imageA: rpmBase,
148-
// imageB: rpmModified,
149-
// differFlags: []string{"--type=rpm"},
150-
// expectedFile: "rpm_diff_expected.json",
151-
// },
152144
{
153145
description: "node differ",
154146
subcommand: "diff",
@@ -204,13 +196,6 @@ func TestDiffAndAnalysis(t *testing.T) {
204196
differFlags: []string{"--type=apt", "--no-cache"},
205197
expectedFile: "apt_analysis_expected.json",
206198
},
207-
// {
208-
// description: "rpm analysis",
209-
// subcommand: "analyze",
210-
// imageA: rpmModified,
211-
// differFlags: []string{"--type=rpm"},
212-
// expectedFile: "rpm_analysis_expected.json",
213-
// },
214199
{
215200
description: "file sorted analysis",
216201
subcommand: "analyze",

tests/pip_analysis_expected.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"Name": "configobj",
88
"Path": "/usr/lib/python2.7/dist-packages",
99
"Version": "5.0.6",
10-
"Size": 89613
10+
"Size": 136871
1111
},
1212
{
1313
"Name": "mercurial",
@@ -37,7 +37,7 @@
3737
"Name": "setuptools",
3838
"Path": "/usr/local/lib/python3.6/site-packages",
3939
"Version": "36.0.1",
40-
"Size": 837337
40+
"Size": 1282800
4141
},
4242
{
4343
"Name": "six",

0 commit comments

Comments
 (0)