@@ -17,7 +17,9 @@ limitations under the License.
17
17
package differs
18
18
19
19
import (
20
+ "bufio"
20
21
"io/ioutil"
22
+ "os"
21
23
"path/filepath"
22
24
"regexp"
23
25
"strings"
@@ -92,18 +94,42 @@ func (a PipAnalyzer) getPackages(image pkgutil.Image) (map[string]map[string]uti
92
94
packageName := packageMatch [1 ]
93
95
version := packageMatch [2 ][:len (packageMatch [2 ])- 1 ]
94
96
95
- // Retrieves size for actual package/script corresponding to each dist-info metadata directory
96
- // by taking the file entry alphabetically before it (for a package) or after it (for a script)
97
+ // First, try and use the "top_level.txt",
98
+ // Many egg packages contains a "top_level.txt" file describing the directories containing the
99
+ // required code. Combining the sizes of each of these directories should give the total size.
97
100
var size int64
98
- if i - 1 >= 0 && contents [i - 1 ].Name () == packageName {
99
- packagePath := filepath .Join (pythonPath , packageName )
100
- size = pkgutil .GetSize (packagePath )
101
- } else if i + 1 < len (contents ) && contents [i + 1 ].Name () == packageName + ".py" {
102
- size = contents [i + 1 ].Size ()
101
+ topLevelReader , err := os .Open (filepath .Join (pythonPath , fileName , "top_level.txt" ))
102
+ if err == nil {
103
+ scanner := bufio .NewScanner (topLevelReader )
104
+ scanner .Split (bufio .ScanLines )
105
+ for scanner .Scan () {
106
+ // check if directory exists first, then retrieve size
107
+ contentPath := filepath .Join (pythonPath , scanner .Text ())
108
+ if _ , err := os .Stat (contentPath ); err == nil {
109
+ size = size + pkgutil .GetSize (contentPath )
110
+ } else if _ , err := os .Stat (contentPath + ".py" ); err == nil {
111
+ // sometimes the top level content is just a single python file; try this too
112
+ size = size + pkgutil .GetSize (contentPath + ".py" )
113
+ }
114
+ }
103
115
} else {
104
- logrus .Errorf ("Could not find Python package %s for corresponding metadata info" , packageName )
105
- continue
116
+ // if we didn't find a top_level.txt, we'll try the previous alphabetical directory entry heuristic
117
+ logrus .Infof ("unable to use top_level.txt: falling back to previous alphabetical directory entry heuristic..." )
118
+
119
+ // Retrieves size for actual package/script corresponding to each dist-info metadata directory
120
+ // by taking the file entry alphabetically before it (for a package) or after it (for a script)
121
+ // var size int64
122
+ if i - 1 >= 0 && contents [i - 1 ].Name () == packageName {
123
+ packagePath := filepath .Join (pythonPath , packageName )
124
+ size = pkgutil .GetSize (packagePath )
125
+ } else if i + 1 < len (contents ) && contents [i + 1 ].Name () == packageName + ".py" {
126
+ size = contents [i + 1 ].Size ()
127
+ } else {
128
+ logrus .Errorf ("Could not find Python package %s for corresponding metadata info" , packageName )
129
+ continue
130
+ }
106
131
}
132
+
107
133
currPackage := util.PackageInfo {Version : version , Size : size }
108
134
mapPath := strings .Replace (pythonPath , path , "" , 1 )
109
135
addToMap (packages , packageName , mapPath , currPackage )
0 commit comments