@@ -87,53 +87,97 @@ func (a PipAnalyzer) getPackages(image pkgutil.Image) (map[string]map[string]uti
87
87
for i := 0 ; i < len (contents ); i ++ {
88
88
c := contents [i ]
89
89
fileName := c .Name ()
90
- // check if package
91
- packageDir := regexp .MustCompile ("^([a-z|A-Z|0-9|_]+)-(([0-9]+?\\ .){2,3})(dist-info|egg-info)$" )
92
- packageMatch := packageDir .FindStringSubmatch (fileName )
93
- if len (packageMatch ) != 0 {
94
- packageName := packageMatch [1 ]
95
- version := packageMatch [2 ][:len (packageMatch [2 ])- 1 ]
96
-
97
- // First, try and use the "top_level.txt",
98
- // Many egg packages contains a "top_level.txt" file describing the directories containing the
99
- // required code. Combining the sizes of each of these directories should give the total size.
100
- var size int64
101
- topLevelReader , err := os .Open (filepath .Join (pythonPath , fileName , "top_level.txt" ))
102
- if err == nil {
103
- scanner := bufio .NewScanner (topLevelReader )
104
- scanner .Split (bufio .ScanLines )
105
- for scanner .Scan () {
106
- // check if directory exists first, then retrieve size
107
- contentPath := filepath .Join (pythonPath , scanner .Text ())
108
- if _ , err := os .Stat (contentPath ); err == nil {
109
- size = size + pkgutil .GetSize (contentPath )
110
- } else if _ , err := os .Stat (contentPath + ".py" ); err == nil {
111
- // sometimes the top level content is just a single python file; try this too
112
- size = size + pkgutil .GetSize (contentPath + ".py" )
113
- }
90
+ var metadata * os.File
91
+ var err error
92
+ if strings .HasSuffix (fileName , "egg-info" ) {
93
+ // wheel directory
94
+ metadata , err = os .Open (filepath .Join (pythonPath , fileName , "PKG-INFO" ))
95
+ if err != nil {
96
+ logrus .Debugf ("unable to open PKG-INFO for egg %s" , fileName )
97
+ }
98
+ } else if strings .HasSuffix (fileName , "dist-info" ) {
99
+ // egg directory
100
+ metadata , err = os .Open (filepath .Join (pythonPath , fileName , "METADATA" ))
101
+ if err != nil {
102
+ logrus .Debugf ("unable to open METADATA for wheel %s" , fileName )
103
+ }
104
+ } else {
105
+ // no match
106
+ continue
107
+ }
108
+
109
+ var line , packageName , version string
110
+ if metadata == nil {
111
+ // unable to open metadata file: try reading the package itself
112
+ mPath := filepath .Join (pythonPath , fileName )
113
+ metadata , err = os .Open (mPath )
114
+ fInfo , _ := os .Stat (mPath )
115
+ if err != nil || fInfo .IsDir () {
116
+ // if this also doesn't work, the package doesn't have the correct metadata structure
117
+ // try and parse the name using a regex anyway
118
+ logrus .Debugf ("failed to locate package metadata: attempting to infer package name" )
119
+ packageDir := regexp .MustCompile ("^([a-z|A-Z|0-9|_]+)-(([0-9]+?\\ .){2,3})(dist-info|egg-info)$" )
120
+ packageMatch := packageDir .FindStringSubmatch (fileName )
121
+ if len (packageMatch ) != 0 {
122
+ packageName = packageMatch [1 ]
123
+ version = packageMatch [2 ][:len (packageMatch [2 ])- 1 ]
114
124
}
115
- } else {
116
- // if we didn't find a top_level.txt, we'll try the previous alphabetical directory entry heuristic
117
- logrus .Infof ("unable to use top_level.txt: falling back to previous alphabetical directory entry heuristic..." )
118
-
119
- // Retrieves size for actual package/script corresponding to each dist-info metadata directory
120
- // by taking the file entry alphabetically before it (for a package) or after it (for a script)
121
- // var size int64
122
- if i - 1 >= 0 && contents [i - 1 ].Name () == packageName {
123
- packagePath := filepath .Join (pythonPath , packageName )
124
- size = pkgutil .GetSize (packagePath )
125
- } else if i + 1 < len (contents ) && contents [i + 1 ].Name () == packageName + ".py" {
126
- size = contents [i + 1 ].Size ()
127
- } else {
128
- logrus .Errorf ("Could not find Python package %s for corresponding metadata info" , packageName )
129
- continue
125
+ }
126
+ }
127
+
128
+ if metadata != nil {
129
+ scanner := bufio .NewScanner (metadata )
130
+ scanner .Split (bufio .ScanLines )
131
+ for scanner .Scan () {
132
+ line = scanner .Text ()
133
+ if strings .HasPrefix (line , "Name" ) {
134
+ packageName = strings .Split (line , ": " )[1 ]
135
+ // next line is always the version
136
+ scanner .Scan ()
137
+ version = strings .Split (scanner .Text (), ": " )[1 ]
138
+ break
130
139
}
131
140
}
141
+ }
132
142
133
- currPackage := util.PackageInfo {Version : version , Size : size }
134
- mapPath := strings .Replace (pythonPath , path , "" , 1 )
135
- addToMap (packages , packageName , mapPath , currPackage )
143
+ // First, try and use the "top_level.txt",
144
+ // Many egg packages contains a "top_level.txt" file describing the directories containing the
145
+ // required code. Combining the sizes of each of these directories should give the total size.
146
+ var size int64
147
+ topLevelReader , err := os .Open (filepath .Join (pythonPath , fileName , "top_level.txt" ))
148
+ if err == nil {
149
+ scanner := bufio .NewScanner (topLevelReader )
150
+ scanner .Split (bufio .ScanLines )
151
+ for scanner .Scan () {
152
+ // check if directory exists first, then retrieve size
153
+ contentPath := filepath .Join (pythonPath , scanner .Text ())
154
+ if _ , err := os .Stat (contentPath ); err == nil {
155
+ size = size + pkgutil .GetSize (contentPath )
156
+ } else if _ , err := os .Stat (contentPath + ".py" ); err == nil {
157
+ // sometimes the top level content is just a single python file; try this too
158
+ size = size + pkgutil .GetSize (contentPath + ".py" )
159
+ }
160
+ }
161
+ } else {
162
+ logrus .Debugf ("unable to use top_level.txt: falling back to alphabetical directory entry heuristic..." )
163
+
164
+ // Retrieves size for actual package/script corresponding to each dist-info metadata directory
165
+ // by examining the file entries directly before and after it
166
+ if i - 1 >= 0 && strings .Contains (contents [i - 1 ].Name (), packageName ) {
167
+ packagePath := filepath .Join (pythonPath , contents [i - 1 ].Name ())
168
+ size = pkgutil .GetSize (packagePath )
169
+ } else if i + 1 < len (contents ) && strings .Contains (contents [i + 1 ].Name (), packageName ) {
170
+ packagePath := filepath .Join (pythonPath , contents [i + 1 ].Name ())
171
+ size = pkgutil .GetSize (packagePath )
172
+ } else {
173
+ logrus .Errorf ("failed to locate python package for corresponding package metadata %s" , packageName )
174
+ continue
175
+ }
136
176
}
177
+
178
+ currPackage := util.PackageInfo {Version : version , Size : size }
179
+ mapPath := strings .Replace (pythonPath , path , "" , 1 )
180
+ addToMap (packages , packageName , mapPath , currPackage )
137
181
}
138
182
}
139
183
0 commit comments