Skip to content

Commit 62faf2e

Browse files
Added LangSmith API Key detector (#4251)
* Added LangSmith API Key detector * added default in engine
1 parent 20e0233 commit 62faf2e

File tree

6 files changed

+314
-2
lines changed

6 files changed

+314
-2
lines changed

pkg/detectors/langsmith/langsmith.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package langsmith
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
9+
regexp "github.com/wasilibs/go-re2"
10+
11+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
12+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
14+
)
15+
16+
type Scanner struct {
17+
client *http.Client
18+
}
19+
20+
// Ensure the Scanner satisfies the interface at compile time.
21+
var _ detectors.Detector = (*Scanner)(nil)
22+
23+
var (
24+
defaultClient = common.SaneHttpClient()
25+
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
26+
keyPat = regexp.MustCompile(`\b(lsv2_(?:pt|sk)_[a-f0-9]{32}_[a-f0-9]{10})\b`) // personal api token and service keys
27+
)
28+
29+
// Keywords are used for efficiently pre-filtering chunks.
30+
// Use identifiers in the secret preferably, or the provider name.
31+
func (s Scanner) Keywords() []string {
32+
return []string{"lsv2_pt_", "lsv2_sk_"}
33+
}
34+
35+
// FromData will find and optionally verify Langsmith secrets in a given set of bytes.
36+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
37+
dataStr := string(data)
38+
39+
uniqueAPIKeys := make(map[string]struct{})
40+
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
41+
uniqueAPIKeys[match[1]] = struct{}{}
42+
}
43+
44+
for apiKey := range uniqueAPIKeys {
45+
s1 := detectors.Result{
46+
DetectorType: detectorspb.DetectorType_LangSmith,
47+
Raw: []byte(apiKey),
48+
}
49+
50+
if verify {
51+
client := s.client
52+
if client == nil {
53+
client = defaultClient
54+
}
55+
56+
isVerified, verificationErr := verifyMatch(ctx, client, apiKey)
57+
s1.Verified = isVerified
58+
s1.SetVerificationError(verificationErr, apiKey)
59+
}
60+
61+
results = append(results, s1)
62+
}
63+
64+
return
65+
}
66+
67+
func verifyMatch(ctx context.Context, client *http.Client, apiKey string) (bool, error) {
68+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.smith.langchain.com/api/v1/api-key", http.NoBody)
69+
if err != nil {
70+
return false, nil
71+
}
72+
73+
req.Header.Set("X-API-Key", apiKey)
74+
75+
res, err := client.Do(req)
76+
if err != nil {
77+
return false, err
78+
}
79+
defer func() {
80+
_, _ = io.Copy(io.Discard, res.Body)
81+
_ = res.Body.Close()
82+
}()
83+
84+
switch res.StatusCode {
85+
case http.StatusOK:
86+
return true, nil
87+
case http.StatusUnauthorized, http.StatusForbidden:
88+
return false, nil
89+
default:
90+
return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
91+
}
92+
}
93+
94+
func (s Scanner) Type() detectorspb.DetectorType {
95+
return detectorspb.DetectorType_LangSmith
96+
}
97+
98+
func (s Scanner) Description() string {
99+
return "LangSmith is a unified observability & evals platform where teams can debug, test, and monitor AI app performance — whether building with LangChain or not"
100+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
//go:build detectors
2+
// +build detectors
3+
4+
package langsmith
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"testing"
10+
"time"
11+
12+
"github.com/google/go-cmp/cmp"
13+
"github.com/google/go-cmp/cmp/cmpopts"
14+
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
17+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
18+
)
19+
20+
func TestLangsmith_FromChunk(t *testing.T) {
21+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
22+
defer cancel()
23+
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
24+
if err != nil {
25+
t.Fatalf("could not get test secrets from GCP: %s", err)
26+
}
27+
secret := testSecrets.MustGetField("LANGSMITH")
28+
inactiveSecret := testSecrets.MustGetField("LANGSMITH_INACTIVE")
29+
30+
type args struct {
31+
ctx context.Context
32+
data []byte
33+
verify bool
34+
}
35+
tests := []struct {
36+
name string
37+
s Scanner
38+
args args
39+
want []detectors.Result
40+
wantErr bool
41+
wantVerificationErr bool
42+
}{
43+
{
44+
name: "found, verified",
45+
s: Scanner{},
46+
args: args{
47+
ctx: context.Background(),
48+
data: []byte(fmt.Sprintf("You can find a langsmith secret %s within", secret)),
49+
verify: true,
50+
},
51+
want: []detectors.Result{
52+
{
53+
DetectorType: detectorspb.DetectorType_LangSmith,
54+
Verified: true,
55+
},
56+
},
57+
wantErr: false,
58+
wantVerificationErr: false,
59+
},
60+
{
61+
name: "found, unverified",
62+
s: Scanner{},
63+
args: args{
64+
ctx: context.Background(),
65+
data: []byte(fmt.Sprintf("You can find a langsmith secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
66+
verify: true,
67+
},
68+
want: []detectors.Result{
69+
{
70+
DetectorType: detectorspb.DetectorType_LangSmith,
71+
Verified: false,
72+
},
73+
},
74+
wantErr: false,
75+
wantVerificationErr: false,
76+
},
77+
{
78+
name: "not found",
79+
s: Scanner{},
80+
args: args{
81+
ctx: context.Background(),
82+
data: []byte("You cannot find the secret within"),
83+
verify: true,
84+
},
85+
want: nil,
86+
wantErr: false,
87+
wantVerificationErr: false,
88+
},
89+
}
90+
for _, tt := range tests {
91+
t.Run(tt.name, func(t *testing.T) {
92+
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
93+
if (err != nil) != tt.wantErr {
94+
t.Errorf("Langsmith.FromData() error = %v, wantErr %v", err, tt.wantErr)
95+
return
96+
}
97+
for i := range got {
98+
if len(got[i].Raw) == 0 {
99+
t.Fatalf("no raw secret present: \n %+v", got[i])
100+
}
101+
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
102+
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
103+
}
104+
}
105+
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError", "primarySecret")
106+
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
107+
t.Errorf("Langsmith.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
108+
}
109+
})
110+
}
111+
}
112+
113+
func BenchmarkFromData(benchmark *testing.B) {
114+
ctx := context.Background()
115+
s := Scanner{}
116+
for name, data := range detectors.MustGetBenchmarkData() {
117+
benchmark.Run(name, func(b *testing.B) {
118+
b.ResetTimer()
119+
for n := 0; n < b.N; n++ {
120+
_, err := s.FromData(ctx, false, data)
121+
if err != nil {
122+
b.Fatal(err)
123+
}
124+
}
125+
})
126+
}
127+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package langsmith
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/google/go-cmp/cmp"
8+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
9+
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
10+
)
11+
12+
func TestLangsmith_Pattern(t *testing.T) {
13+
d := Scanner{}
14+
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
15+
tests := []struct {
16+
name string
17+
input string
18+
want []string
19+
}{
20+
{
21+
name: "typical pattern",
22+
input: "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",
23+
want: []string{"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"},
24+
},
25+
{
26+
name: "finds all matches",
27+
input: `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 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`,
28+
want: []string{"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", "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"},
29+
},
30+
{
31+
name: "invalid pattern",
32+
input: "lsv2_pt_1e0430d40fc14d3fj03397b9e6z46289_2b9036edd2",
33+
want: []string{},
34+
},
35+
}
36+
37+
for _, test := range tests {
38+
t.Run(test.name, func(t *testing.T) {
39+
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
40+
if len(matchedDetectors) == 0 {
41+
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
42+
return
43+
}
44+
45+
results, err := d.FromData(context.Background(), false, []byte(test.input))
46+
if err != nil {
47+
t.Errorf("error = %v", err)
48+
return
49+
}
50+
51+
if len(results) != len(test.want) {
52+
if len(results) == 0 {
53+
t.Errorf("did not receive result")
54+
} else {
55+
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
56+
}
57+
return
58+
}
59+
60+
actual := make(map[string]struct{}, len(results))
61+
for _, r := range results {
62+
if len(r.RawV2) > 0 {
63+
actual[string(r.RawV2)] = struct{}{}
64+
} else {
65+
actual[string(r.Raw)] = struct{}{}
66+
}
67+
}
68+
expected := make(map[string]struct{}, len(test.want))
69+
for _, v := range test.want {
70+
expected[v] = struct{}{}
71+
}
72+
73+
if diff := cmp.Diff(expected, actual); diff != "" {
74+
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
75+
}
76+
})
77+
}
78+
}

pkg/engine/defaults/defaults.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ import (
406406
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/kucoin"
407407
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/kylas"
408408
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/langfuse"
409+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/langsmith"
409410
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/languagelayer"
410411
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/larksuite"
411412
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/larksuiteapikey"
@@ -1263,6 +1264,7 @@ func buildDetectorList() []detectors.Detector {
12631264
&kucoin.Scanner{},
12641265
&kylas.Scanner{},
12651266
&langfuse.Scanner{},
1267+
&langsmith.Scanner{},
12661268
&languagelayer.Scanner{},
12671269
&larksuite.Scanner{},
12681270
&larksuiteapikey.Scanner{},

pkg/pb/detectorspb/detectors.pb.go

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/detectors.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,7 @@ enum DetectorType {
10371037
AzureAppConfigConnectionString = 1025;
10381038
DeepSeek = 1026;
10391039
StripePaymentIntent = 1027;
1040+
LangSmith = 1028;
10401041
}
10411042

10421043
message Result {

0 commit comments

Comments
 (0)